2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "main/context.h"
25 #include "main/teximage.h"
26 #include "main/blend.h"
27 #include "main/bufferobj.h"
28 #include "main/enums.h"
29 #include "main/fbobject.h"
30 #include "main/image.h"
31 #include "main/renderbuffer.h"
32 #include "main/glformats.h"
34 #include "brw_blorp.h"
35 #include "brw_context.h"
36 #include "brw_defines.h"
37 #include "brw_meta_util.h"
38 #include "brw_state.h"
39 #include "intel_buffer_objects.h"
40 #include "intel_fbo.h"
41 #include "common/gen_debug.h"
43 #define FILE_DEBUG_FLAG DEBUG_BLORP
46 brw_blorp_lookup_shader(struct blorp_context
*blorp
,
47 const void *key
, uint32_t key_size
,
48 uint32_t *kernel_out
, void *prog_data_out
)
50 struct brw_context
*brw
= blorp
->driver_ctx
;
51 return brw_search_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
, key
, key_size
,
52 kernel_out
, prog_data_out
, true);
56 brw_blorp_upload_shader(struct blorp_context
*blorp
,
57 const void *key
, uint32_t key_size
,
58 const void *kernel
, uint32_t kernel_size
,
59 const struct brw_stage_prog_data
*prog_data
,
60 uint32_t prog_data_size
,
61 uint32_t *kernel_out
, void *prog_data_out
)
63 struct brw_context
*brw
= blorp
->driver_ctx
;
64 brw_upload_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
, key
, key_size
,
65 kernel
, kernel_size
, prog_data
, prog_data_size
,
66 kernel_out
, prog_data_out
);
71 brw_blorp_init(struct brw_context
*brw
)
73 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
75 blorp_init(&brw
->blorp
, brw
, &brw
->isl_dev
);
77 brw
->blorp
.compiler
= brw
->screen
->compiler
;
79 switch (devinfo
->gen
) {
81 if (devinfo
->is_g4x
) {
82 brw
->blorp
.exec
= gen45_blorp_exec
;
84 brw
->blorp
.exec
= gen4_blorp_exec
;
88 brw
->blorp
.exec
= gen5_blorp_exec
;
91 brw
->blorp
.exec
= gen6_blorp_exec
;
94 if (devinfo
->is_haswell
) {
95 brw
->blorp
.exec
= gen75_blorp_exec
;
97 brw
->blorp
.exec
= gen7_blorp_exec
;
101 brw
->blorp
.exec
= gen8_blorp_exec
;
104 brw
->blorp
.exec
= gen9_blorp_exec
;
107 brw
->blorp
.exec
= gen10_blorp_exec
;
110 brw
->blorp
.exec
= gen11_blorp_exec
;
114 unreachable("Invalid gen");
117 brw
->blorp
.lookup_shader
= brw_blorp_lookup_shader
;
118 brw
->blorp
.upload_shader
= brw_blorp_upload_shader
;
122 blorp_surf_for_miptree(struct brw_context
*brw
,
123 struct blorp_surf
*surf
,
124 const struct intel_mipmap_tree
*mt
,
125 enum isl_aux_usage aux_usage
,
126 bool is_render_target
,
128 unsigned start_layer
, unsigned num_layers
,
129 struct isl_surf tmp_surfs
[1])
131 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
133 if (mt
->surf
.msaa_layout
== ISL_MSAA_LAYOUT_ARRAY
) {
134 const unsigned num_samples
= mt
->surf
.samples
;
135 for (unsigned i
= 0; i
< num_layers
; i
++) {
136 for (unsigned s
= 0; s
< num_samples
; s
++) {
137 const unsigned phys_layer
= (start_layer
+ i
) * num_samples
+ s
;
138 intel_miptree_check_level_layer(mt
, *level
, phys_layer
);
142 for (unsigned i
= 0; i
< num_layers
; i
++)
143 intel_miptree_check_level_layer(mt
, *level
, start_layer
+ i
);
146 *surf
= (struct blorp_surf
) {
148 .addr
= (struct blorp_address
) {
150 .offset
= mt
->offset
,
151 .reloc_flags
= is_render_target
? EXEC_OBJECT_WRITE
: 0,
152 .mocs
= brw_get_bo_mocs(devinfo
, mt
->bo
),
154 .aux_usage
= aux_usage
,
155 .tile_x_sa
= mt
->level
[*level
].level_x
,
156 .tile_y_sa
= mt
->level
[*level
].level_y
,
159 if (surf
->aux_usage
== ISL_AUX_USAGE_HIZ
&&
160 !intel_miptree_level_has_hiz(mt
, *level
))
161 surf
->aux_usage
= ISL_AUX_USAGE_NONE
;
163 if (surf
->aux_usage
!= ISL_AUX_USAGE_NONE
) {
164 /* We only really need a clear color if we also have an auxiliary
165 * surface. Without one, it does nothing.
168 intel_miptree_get_clear_color(devinfo
, mt
, mt
->surf
.format
,
169 !is_render_target
, (struct brw_bo
**)
170 &surf
->clear_color_addr
.buffer
,
171 &surf
->clear_color_addr
.offset
);
173 surf
->aux_surf
= &mt
->aux_buf
->surf
;
174 surf
->aux_addr
= (struct blorp_address
) {
175 .reloc_flags
= is_render_target
? EXEC_OBJECT_WRITE
: 0,
176 .mocs
= surf
->addr
.mocs
,
179 surf
->aux_addr
.buffer
= mt
->aux_buf
->bo
;
180 surf
->aux_addr
.offset
= mt
->aux_buf
->offset
;
182 surf
->aux_addr
= (struct blorp_address
) {
185 memset(&surf
->clear_color
, 0, sizeof(surf
->clear_color
));
187 assert((surf
->aux_usage
== ISL_AUX_USAGE_NONE
) ==
188 (surf
->aux_addr
.buffer
== NULL
));
190 if (!is_render_target
&& brw
->screen
->devinfo
.gen
== 9)
191 gen9_apply_single_tex_astc5x5_wa(brw
, mt
->format
, surf
->aux_usage
);
193 /* ISL wants real levels, not offset ones. */
194 *level
-= mt
->first_level
;
198 brw_blorp_supports_dst_format(struct brw_context
*brw
, mesa_format format
)
200 /* If it's renderable, it's definitely supported. */
201 if (brw
->mesa_format_supports_render
[format
])
204 /* BLORP can't compress anything */
205 if (_mesa_is_format_compressed(format
))
208 /* No exotic formats such as GL_LUMINANCE_ALPHA */
209 if (_mesa_get_format_bits(format
, GL_RED_BITS
) == 0 &&
210 _mesa_get_format_bits(format
, GL_DEPTH_BITS
) == 0 &&
211 _mesa_get_format_bits(format
, GL_STENCIL_BITS
) == 0)
217 static enum isl_format
218 brw_blorp_to_isl_format(struct brw_context
*brw
, mesa_format format
,
219 bool is_render_target
)
222 case MESA_FORMAT_NONE
:
223 return ISL_FORMAT_UNSUPPORTED
;
224 case MESA_FORMAT_S_UINT8
:
225 return ISL_FORMAT_R8_UINT
;
226 case MESA_FORMAT_Z24_UNORM_X8_UINT
:
227 case MESA_FORMAT_Z24_UNORM_S8_UINT
:
228 return ISL_FORMAT_R24_UNORM_X8_TYPELESS
;
229 case MESA_FORMAT_Z_FLOAT32
:
230 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT
:
231 return ISL_FORMAT_R32_FLOAT
;
232 case MESA_FORMAT_Z_UNORM16
:
233 return ISL_FORMAT_R16_UNORM
;
235 if (is_render_target
) {
236 assert(brw_blorp_supports_dst_format(brw
, format
));
237 if (brw
->mesa_format_supports_render
[format
]) {
238 return brw
->mesa_to_isl_render_format
[format
];
240 return brw_isl_format_for_mesa_format(format
);
243 /* Some destinations (is_render_target == true) are supported by
244 * blorp even though we technically can't render to them.
246 return brw_isl_format_for_mesa_format(format
);
252 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
253 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
255 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
258 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
260 * which is simply adding 4 then modding by 8 (or anding with 7).
262 * We then may need to apply workarounds for textureGather hardware bugs.
264 static enum isl_channel_select
265 swizzle_to_scs(GLenum swizzle
)
267 return (enum isl_channel_select
)((swizzle
+ 4) & 7);
271 * Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using
272 * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
273 * the physical layer holding sample 0. So, for example, if
274 * src_mt->surf.samples == 4, then logical layer n corresponds to src_layer ==
278 brw_blorp_blit_miptrees(struct brw_context
*brw
,
279 struct intel_mipmap_tree
*src_mt
,
280 unsigned src_level
, unsigned src_layer
,
281 mesa_format src_format
, int src_swizzle
,
282 struct intel_mipmap_tree
*dst_mt
,
283 unsigned dst_level
, unsigned dst_layer
,
284 mesa_format dst_format
,
285 float src_x0
, float src_y0
,
286 float src_x1
, float src_y1
,
287 float dst_x0
, float dst_y0
,
288 float dst_x1
, float dst_y1
,
289 GLenum gl_filter
, bool mirror_x
, bool mirror_y
,
290 bool decode_srgb
, bool encode_srgb
)
292 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
294 DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f) "
295 "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
297 src_mt
->surf
.samples
, _mesa_get_format_name(src_mt
->format
), src_mt
,
298 src_level
, src_layer
, src_x0
, src_y0
, src_x1
, src_y1
,
299 dst_mt
->surf
.samples
, _mesa_get_format_name(dst_mt
->format
), dst_mt
,
300 dst_level
, dst_layer
, dst_x0
, dst_y0
, dst_x1
, dst_y1
,
303 if (!decode_srgb
&& _mesa_get_format_color_encoding(src_format
) == GL_SRGB
)
304 src_format
= _mesa_get_srgb_format_linear(src_format
);
306 if (!encode_srgb
&& _mesa_get_format_color_encoding(dst_format
) == GL_SRGB
)
307 dst_format
= _mesa_get_srgb_format_linear(dst_format
);
309 /* When doing a multisample resolve of a GL_LUMINANCE32F or GL_INTENSITY32F
310 * texture, the above code configures the source format for L32_FLOAT or
311 * I32_FLOAT, and the destination format for R32_FLOAT. On Sandy Bridge,
312 * the SAMPLE message appears to handle multisampled L32_FLOAT and
313 * I32_FLOAT textures incorrectly, resulting in blocky artifacts. So work
314 * around the problem by using a source format of R32_FLOAT. This
315 * shouldn't affect rendering correctness, since the destination format is
316 * R32_FLOAT, so only the contents of the red channel matters.
318 if (devinfo
->gen
== 6 &&
319 src_mt
->surf
.samples
> 1 && dst_mt
->surf
.samples
<= 1 &&
320 src_mt
->format
== dst_mt
->format
&&
321 (dst_format
== MESA_FORMAT_L_FLOAT32
||
322 dst_format
== MESA_FORMAT_I_FLOAT32
)) {
323 src_format
= dst_format
= MESA_FORMAT_R_FLOAT32
;
326 enum blorp_filter blorp_filter
;
327 if (fabsf(dst_x1
- dst_x0
) == fabsf(src_x1
- src_x0
) &&
328 fabsf(dst_y1
- dst_y0
) == fabsf(src_y1
- src_y0
)) {
329 if (src_mt
->surf
.samples
> 1 && dst_mt
->surf
.samples
<= 1) {
330 /* From the OpenGL ES 3.2 specification, section 16.2.1:
332 * "If the read framebuffer is multisampled (its effective value
333 * of SAMPLE_BUFFERS is one) and the draw framebuffer is not (its
334 * value of SAMPLE_BUFFERS is zero), the samples corresponding to
335 * each pixel location in the source are converted to a single
336 * sample before being written to the destination. The filter
337 * parameter is ignored. If the source formats are integer types
338 * or stencil values, a single sample’s value is selected for each
339 * pixel. If the source formats are floating-point or normalized
340 * types, the sample values for each pixel are resolved in an
341 * implementation-dependent manner. If the source formats are
342 * depth values, sample values are resolved in an implementation-
343 * dependent manner where the result will be between the minimum
344 * and maximum depth values in the pixel."
346 * For depth and stencil resolves, we choose to always use the value
349 GLenum base_format
= _mesa_get_format_base_format(src_mt
->format
);
350 if (base_format
== GL_DEPTH_COMPONENT
||
351 base_format
== GL_STENCIL_INDEX
||
352 base_format
== GL_DEPTH_STENCIL
||
353 _mesa_is_format_integer(src_mt
->format
)) {
354 /* The OpenGL ES 3.2 spec says:
356 * "If the source formats are integer types or stencil values,
357 * a single sample's value is selected for each pixel."
359 * Just take sample 0 in this case.
361 blorp_filter
= BLORP_FILTER_SAMPLE_0
;
363 blorp_filter
= BLORP_FILTER_AVERAGE
;
366 /* From the OpenGL 4.6 specification, section 18.3.1:
368 * "If the source and destination dimensions are identical, no
369 * filtering is applied."
371 * Using BLORP_FILTER_NONE will also handle the upsample case by
372 * replicating the one value in the source to all values in the
375 blorp_filter
= BLORP_FILTER_NONE
;
377 } else if (gl_filter
== GL_LINEAR
||
378 gl_filter
== GL_SCALED_RESOLVE_FASTEST_EXT
||
379 gl_filter
== GL_SCALED_RESOLVE_NICEST_EXT
) {
380 blorp_filter
= BLORP_FILTER_BILINEAR
;
382 blorp_filter
= BLORP_FILTER_NEAREST
;
385 enum isl_format src_isl_format
=
386 brw_blorp_to_isl_format(brw
, src_format
, false);
387 enum isl_aux_usage src_aux_usage
=
388 intel_miptree_texture_aux_usage(brw
, src_mt
, src_isl_format
,
389 0 /* The astc5x5 WA isn't needed */);
390 /* We do format workarounds for some depth formats so we can't reliably
391 * sample with HiZ. One of these days, we should fix that.
393 if (src_aux_usage
== ISL_AUX_USAGE_HIZ
)
394 src_aux_usage
= ISL_AUX_USAGE_NONE
;
395 const bool src_clear_supported
=
396 src_aux_usage
!= ISL_AUX_USAGE_NONE
&& src_mt
->format
== src_format
;
397 intel_miptree_prepare_access(brw
, src_mt
, src_level
, 1, src_layer
, 1,
398 src_aux_usage
, src_clear_supported
);
400 enum isl_format dst_isl_format
=
401 brw_blorp_to_isl_format(brw
, dst_format
, true);
402 enum isl_aux_usage dst_aux_usage
=
403 intel_miptree_render_aux_usage(brw
, dst_mt
, dst_isl_format
,
405 const bool dst_clear_supported
= dst_aux_usage
!= ISL_AUX_USAGE_NONE
;
406 intel_miptree_prepare_access(brw
, dst_mt
, dst_level
, 1, dst_layer
, 1,
407 dst_aux_usage
, dst_clear_supported
);
409 struct isl_surf tmp_surfs
[2];
410 struct blorp_surf src_surf
, dst_surf
;
411 blorp_surf_for_miptree(brw
, &src_surf
, src_mt
, src_aux_usage
, false,
412 &src_level
, src_layer
, 1, &tmp_surfs
[0]);
413 blorp_surf_for_miptree(brw
, &dst_surf
, dst_mt
, dst_aux_usage
, true,
414 &dst_level
, dst_layer
, 1, &tmp_surfs
[1]);
416 struct isl_swizzle src_isl_swizzle
= {
417 .r
= swizzle_to_scs(GET_SWZ(src_swizzle
, 0)),
418 .g
= swizzle_to_scs(GET_SWZ(src_swizzle
, 1)),
419 .b
= swizzle_to_scs(GET_SWZ(src_swizzle
, 2)),
420 .a
= swizzle_to_scs(GET_SWZ(src_swizzle
, 3)),
423 struct blorp_batch batch
;
424 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
425 blorp_blit(&batch
, &src_surf
, src_level
, src_layer
,
426 src_isl_format
, src_isl_swizzle
,
427 &dst_surf
, dst_level
, dst_layer
,
428 dst_isl_format
, ISL_SWIZZLE_IDENTITY
,
429 src_x0
, src_y0
, src_x1
, src_y1
,
430 dst_x0
, dst_y0
, dst_x1
, dst_y1
,
431 blorp_filter
, mirror_x
, mirror_y
);
432 blorp_batch_finish(&batch
);
434 intel_miptree_finish_write(brw
, dst_mt
, dst_level
, dst_layer
, 1,
439 brw_blorp_copy_miptrees(struct brw_context
*brw
,
440 struct intel_mipmap_tree
*src_mt
,
441 unsigned src_level
, unsigned src_layer
,
442 struct intel_mipmap_tree
*dst_mt
,
443 unsigned dst_level
, unsigned dst_layer
,
444 unsigned src_x
, unsigned src_y
,
445 unsigned dst_x
, unsigned dst_y
,
446 unsigned src_width
, unsigned src_height
)
448 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
450 DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d"
451 "to %dx %s mt %p %d %d (%d,%d)\n",
453 src_mt
->surf
.samples
, _mesa_get_format_name(src_mt
->format
), src_mt
,
454 src_level
, src_layer
, src_x
, src_y
, src_width
, src_height
,
455 dst_mt
->surf
.samples
, _mesa_get_format_name(dst_mt
->format
), dst_mt
,
456 dst_level
, dst_layer
, dst_x
, dst_y
);
458 enum isl_aux_usage src_aux_usage
, dst_aux_usage
;
459 bool src_clear_supported
, dst_clear_supported
;
461 switch (src_mt
->aux_usage
) {
462 case ISL_AUX_USAGE_MCS
:
463 case ISL_AUX_USAGE_CCS_E
:
464 src_aux_usage
= src_mt
->aux_usage
;
465 /* Prior to gen9, fast-clear only supported 0/1 clear colors. Since
466 * we're going to re-interpret the format as an integer format possibly
467 * with a different number of components, we can't handle clear colors
470 src_clear_supported
= devinfo
->gen
>= 9;
473 src_aux_usage
= ISL_AUX_USAGE_NONE
;
474 src_clear_supported
= false;
478 switch (dst_mt
->aux_usage
) {
479 case ISL_AUX_USAGE_MCS
:
480 case ISL_AUX_USAGE_CCS_E
:
481 dst_aux_usage
= dst_mt
->aux_usage
;
482 /* Prior to gen9, fast-clear only supported 0/1 clear colors. Since
483 * we're going to re-interpret the format as an integer format possibly
484 * with a different number of components, we can't handle clear colors
487 dst_clear_supported
= devinfo
->gen
>= 9;
490 dst_aux_usage
= ISL_AUX_USAGE_NONE
;
491 dst_clear_supported
= false;
495 intel_miptree_prepare_access(brw
, src_mt
, src_level
, 1, src_layer
, 1,
496 src_aux_usage
, src_clear_supported
);
497 intel_miptree_prepare_access(brw
, dst_mt
, dst_level
, 1, dst_layer
, 1,
498 dst_aux_usage
, dst_clear_supported
);
500 struct isl_surf tmp_surfs
[2];
501 struct blorp_surf src_surf
, dst_surf
;
502 blorp_surf_for_miptree(brw
, &src_surf
, src_mt
, src_aux_usage
, false,
503 &src_level
, src_layer
, 1, &tmp_surfs
[0]);
504 blorp_surf_for_miptree(brw
, &dst_surf
, dst_mt
, dst_aux_usage
, true,
505 &dst_level
, dst_layer
, 1, &tmp_surfs
[1]);
507 /* The hardware seems to have issues with having a two different format
508 * views of the same texture in the sampler cache at the same time. It's
509 * unclear exactly what the issue is but it hurts glCopyImageSubData
510 * particularly badly because it does a lot of format reinterprets. We
511 * badly need better understanding of the issue and a better fix but this
512 * works for now and fixes CTS tests.
514 * TODO: Remove this hack!
516 brw_emit_pipe_control_flush(brw
, PIPE_CONTROL_CS_STALL
|
517 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
);
519 struct blorp_batch batch
;
520 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
521 blorp_copy(&batch
, &src_surf
, src_level
, src_layer
,
522 &dst_surf
, dst_level
, dst_layer
,
523 src_x
, src_y
, dst_x
, dst_y
, src_width
, src_height
);
524 blorp_batch_finish(&batch
);
526 brw_emit_pipe_control_flush(brw
, PIPE_CONTROL_CS_STALL
|
527 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE
);
529 intel_miptree_finish_write(brw
, dst_mt
, dst_level
, dst_layer
, 1,
534 brw_blorp_copy_buffers(struct brw_context
*brw
,
535 struct brw_bo
*src_bo
,
537 struct brw_bo
*dst_bo
,
541 DBG("%s %d bytes from %p[%d] to %p[%d]",
542 __func__
, size
, src_bo
, src_offset
, dst_bo
, dst_offset
);
544 struct blorp_batch batch
;
545 struct blorp_address src
= { .buffer
= src_bo
, .offset
= src_offset
};
546 struct blorp_address dst
= { .buffer
= dst_bo
, .offset
= dst_offset
};
548 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
549 blorp_buffer_copy(&batch
, src
, dst
, size
);
550 blorp_batch_finish(&batch
);
554 static struct intel_mipmap_tree
*
555 find_miptree(GLbitfield buffer_bit
, struct intel_renderbuffer
*irb
)
557 struct intel_mipmap_tree
*mt
= irb
->mt
;
558 if (buffer_bit
== GL_STENCIL_BUFFER_BIT
&& mt
->stencil_mt
)
564 blorp_get_texture_swizzle(const struct intel_renderbuffer
*irb
)
566 return irb
->Base
.Base
._BaseFormat
== GL_RGB
?
567 MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ONE
) :
572 do_blorp_blit(struct brw_context
*brw
, GLbitfield buffer_bit
,
573 struct intel_renderbuffer
*src_irb
, mesa_format src_format
,
574 struct intel_renderbuffer
*dst_irb
, mesa_format dst_format
,
575 GLfloat srcX0
, GLfloat srcY0
, GLfloat srcX1
, GLfloat srcY1
,
576 GLfloat dstX0
, GLfloat dstY0
, GLfloat dstX1
, GLfloat dstY1
,
577 GLenum filter
, bool mirror_x
, bool mirror_y
)
579 const struct gl_context
*ctx
= &brw
->ctx
;
581 /* Find source/dst miptrees */
582 struct intel_mipmap_tree
*src_mt
= find_miptree(buffer_bit
, src_irb
);
583 struct intel_mipmap_tree
*dst_mt
= find_miptree(buffer_bit
, dst_irb
);
585 const bool do_srgb
= ctx
->Color
.sRGBEnabled
;
588 brw_blorp_blit_miptrees(brw
,
589 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
590 src_format
, blorp_get_texture_swizzle(src_irb
),
591 dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
,
593 srcX0
, srcY0
, srcX1
, srcY1
,
594 dstX0
, dstY0
, dstX1
, dstY1
,
595 filter
, mirror_x
, mirror_y
,
598 dst_irb
->need_downsample
= true;
602 try_blorp_blit(struct brw_context
*brw
,
603 const struct gl_framebuffer
*read_fb
,
604 const struct gl_framebuffer
*draw_fb
,
605 GLfloat srcX0
, GLfloat srcY0
, GLfloat srcX1
, GLfloat srcY1
,
606 GLfloat dstX0
, GLfloat dstY0
, GLfloat dstX1
, GLfloat dstY1
,
607 GLenum filter
, GLbitfield buffer_bit
)
609 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
610 struct gl_context
*ctx
= &brw
->ctx
;
612 /* Sync up the state of window system buffers. We need to do this before
613 * we go looking for the buffers.
615 intel_prepare_render(brw
);
617 bool mirror_x
, mirror_y
;
618 if (brw_meta_mirror_clip_and_scissor(ctx
, read_fb
, draw_fb
,
619 &srcX0
, &srcY0
, &srcX1
, &srcY1
,
620 &dstX0
, &dstY0
, &dstX1
, &dstY1
,
621 &mirror_x
, &mirror_y
))
625 struct intel_renderbuffer
*src_irb
;
626 struct intel_renderbuffer
*dst_irb
;
627 struct intel_mipmap_tree
*src_mt
;
628 struct intel_mipmap_tree
*dst_mt
;
629 switch (buffer_bit
) {
630 case GL_COLOR_BUFFER_BIT
:
631 src_irb
= intel_renderbuffer(read_fb
->_ColorReadBuffer
);
632 for (unsigned i
= 0; i
< draw_fb
->_NumColorDrawBuffers
; ++i
) {
633 dst_irb
= intel_renderbuffer(draw_fb
->_ColorDrawBuffers
[i
]);
635 do_blorp_blit(brw
, buffer_bit
,
636 src_irb
, src_irb
->Base
.Base
.Format
,
637 dst_irb
, dst_irb
->Base
.Base
.Format
,
638 srcX0
, srcY0
, srcX1
, srcY1
,
639 dstX0
, dstY0
, dstX1
, dstY1
,
640 filter
, mirror_x
, mirror_y
);
643 case GL_DEPTH_BUFFER_BIT
:
645 intel_renderbuffer(read_fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
);
647 intel_renderbuffer(draw_fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
);
648 src_mt
= find_miptree(buffer_bit
, src_irb
);
649 dst_mt
= find_miptree(buffer_bit
, dst_irb
);
651 /* We also can't handle any combined depth-stencil formats because we
652 * have to reinterpret as a color format.
654 if (_mesa_get_format_base_format(src_mt
->format
) == GL_DEPTH_STENCIL
||
655 _mesa_get_format_base_format(dst_mt
->format
) == GL_DEPTH_STENCIL
)
658 do_blorp_blit(brw
, buffer_bit
, src_irb
, MESA_FORMAT_NONE
,
659 dst_irb
, MESA_FORMAT_NONE
, srcX0
, srcY0
,
660 srcX1
, srcY1
, dstX0
, dstY0
, dstX1
, dstY1
,
661 filter
, mirror_x
, mirror_y
);
663 case GL_STENCIL_BUFFER_BIT
:
664 /* Blorp doesn't support combined depth stencil which is all we have
667 if (devinfo
->gen
< 6)
671 intel_renderbuffer(read_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
);
673 intel_renderbuffer(draw_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
);
674 do_blorp_blit(brw
, buffer_bit
, src_irb
, MESA_FORMAT_NONE
,
675 dst_irb
, MESA_FORMAT_NONE
, srcX0
, srcY0
,
676 srcX1
, srcY1
, dstX0
, dstY0
, dstX1
, dstY1
,
677 filter
, mirror_x
, mirror_y
);
680 unreachable("not reached");
687 apply_y_flip(int *y0
, int *y1
, int height
)
689 int tmp
= height
- *y0
;
695 brw_blorp_copytexsubimage(struct brw_context
*brw
,
696 struct gl_renderbuffer
*src_rb
,
697 struct gl_texture_image
*dst_image
,
699 int srcX0
, int srcY0
,
700 int dstX0
, int dstY0
,
701 int width
, int height
)
703 struct gl_context
*ctx
= &brw
->ctx
;
704 struct intel_renderbuffer
*src_irb
= intel_renderbuffer(src_rb
);
705 struct intel_texture_image
*intel_image
= intel_texture_image(dst_image
);
707 /* No pixel transfer operations (zoom, bias, mapping), just a blit */
708 if (brw
->ctx
._ImageTransferState
)
711 /* Sync up the state of window system buffers. We need to do this before
712 * we go looking at the src renderbuffer's miptree.
714 intel_prepare_render(brw
);
716 struct intel_mipmap_tree
*src_mt
= src_irb
->mt
;
717 struct intel_mipmap_tree
*dst_mt
= intel_image
->mt
;
719 /* We can't handle any combined depth-stencil formats because we have to
720 * reinterpret as a color format.
722 if (_mesa_get_format_base_format(src_mt
->format
) == GL_DEPTH_STENCIL
||
723 _mesa_get_format_base_format(dst_mt
->format
) == GL_DEPTH_STENCIL
)
726 if (!brw_blorp_supports_dst_format(brw
, dst_image
->TexFormat
))
729 /* Source clipping shouldn't be necessary, since copytexsubimage (in
730 * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
733 * Destination clipping shouldn't be necessary since the restrictions on
734 * glCopyTexSubImage prevent the user from specifying a destination rectangle
735 * that falls outside the bounds of the destination texture.
736 * See error_check_subtexture_dimensions().
739 int srcY1
= srcY0
+ height
;
740 int srcX1
= srcX0
+ width
;
741 int dstX1
= dstX0
+ width
;
742 int dstY1
= dstY0
+ height
;
744 /* Account for the fact that in the system framebuffer, the origin is at
747 bool mirror_y
= ctx
->ReadBuffer
->FlipY
;
749 apply_y_flip(&srcY0
, &srcY1
, src_rb
->Height
);
751 /* Account for face selection and texture view MinLayer */
752 int dst_slice
= slice
+ dst_image
->TexObject
->MinLayer
+ dst_image
->Face
;
753 int dst_level
= dst_image
->Level
+ dst_image
->TexObject
->MinLevel
;
755 brw_blorp_blit_miptrees(brw
,
756 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
757 src_rb
->Format
, blorp_get_texture_swizzle(src_irb
),
758 dst_mt
, dst_level
, dst_slice
,
759 dst_image
->TexFormat
,
760 srcX0
, srcY0
, srcX1
, srcY1
,
761 dstX0
, dstY0
, dstX1
, dstY1
,
762 GL_NEAREST
, false, mirror_y
,
765 /* If we're copying to a packed depth stencil texture and the source
766 * framebuffer has separate stencil, we need to also copy the stencil data
769 src_rb
= ctx
->ReadBuffer
->Attachment
[BUFFER_STENCIL
].Renderbuffer
;
770 if (_mesa_get_format_bits(dst_image
->TexFormat
, GL_STENCIL_BITS
) > 0 &&
772 src_irb
= intel_renderbuffer(src_rb
);
773 src_mt
= src_irb
->mt
;
775 if (src_mt
->stencil_mt
)
776 src_mt
= src_mt
->stencil_mt
;
777 if (dst_mt
->stencil_mt
)
778 dst_mt
= dst_mt
->stencil_mt
;
780 if (src_mt
!= dst_mt
) {
781 brw_blorp_blit_miptrees(brw
,
782 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
784 blorp_get_texture_swizzle(src_irb
),
785 dst_mt
, dst_level
, dst_slice
,
787 srcX0
, srcY0
, srcX1
, srcY1
,
788 dstX0
, dstY0
, dstX1
, dstY1
,
789 GL_NEAREST
, false, mirror_y
,
799 brw_blorp_framebuffer(struct brw_context
*brw
,
800 struct gl_framebuffer
*readFb
,
801 struct gl_framebuffer
*drawFb
,
802 GLint srcX0
, GLint srcY0
, GLint srcX1
, GLint srcY1
,
803 GLint dstX0
, GLint dstY0
, GLint dstX1
, GLint dstY1
,
804 GLbitfield mask
, GLenum filter
)
806 static GLbitfield buffer_bits
[] = {
809 GL_STENCIL_BUFFER_BIT
,
812 for (unsigned int i
= 0; i
< ARRAY_SIZE(buffer_bits
); ++i
) {
813 if ((mask
& buffer_bits
[i
]) &&
814 try_blorp_blit(brw
, readFb
, drawFb
,
815 srcX0
, srcY0
, srcX1
, srcY1
,
816 dstX0
, dstY0
, dstX1
, dstY1
,
817 filter
, buffer_bits
[i
])) {
818 mask
&= ~buffer_bits
[i
];
825 static struct brw_bo
*
826 blorp_get_client_bo(struct brw_context
*brw
,
827 unsigned w
, unsigned h
, unsigned d
,
828 GLenum target
, GLenum format
, GLenum type
,
830 const struct gl_pixelstore_attrib
*packing
,
831 uint32_t *offset_out
, uint32_t *row_stride_out
,
832 uint32_t *image_stride_out
, bool read_only
)
834 /* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */
835 const GLuint dims
= _mesa_get_texture_dimensions(target
);
836 const uint32_t first_pixel
= _mesa_image_offset(dims
, packing
, w
, h
,
837 format
, type
, 0, 0, 0);
838 const uint32_t last_pixel
= _mesa_image_offset(dims
, packing
, w
, h
,
841 const uint32_t stride
= _mesa_image_row_stride(packing
, w
, format
, type
);
842 const uint32_t cpp
= _mesa_bytes_per_pixel(format
, type
);
843 const uint32_t size
= last_pixel
- first_pixel
;
845 *row_stride_out
= stride
;
846 *image_stride_out
= _mesa_image_image_stride(packing
, w
, h
, format
, type
);
848 if (_mesa_is_bufferobj(packing
->BufferObj
)) {
849 const uint32_t offset
= first_pixel
+ (intptr_t)pixels
;
850 if (!read_only
&& ((offset
% cpp
) || (stride
% cpp
))) {
851 perf_debug("Bad PBO alignment; fallback to CPU mapping\n");
855 /* This is a user-provided PBO. We just need to get the BO out */
856 struct intel_buffer_object
*intel_pbo
=
857 intel_buffer_object(packing
->BufferObj
);
859 intel_bufferobj_buffer(brw
, intel_pbo
, offset
, size
, !read_only
);
861 /* We take a reference to the BO so that the caller can just always
862 * unref without having to worry about whether it's a user PBO or one
865 brw_bo_reference(bo
);
867 *offset_out
= offset
;
870 /* Someone should have already checked that there is data to upload. */
873 /* Creating a temp buffer currently only works for upload */
876 /* This is not a user-provided PBO. Instead, pixels is a pointer to CPU
877 * data which we need to copy into a BO.
880 brw_bo_alloc(brw
->bufmgr
, "tmp_tex_subimage_src", size
,
883 perf_debug("intel_texsubimage: temp bo creation failed: size = %u\n",
888 if (brw_bo_subdata(bo
, 0, size
, pixels
+ first_pixel
)) {
889 perf_debug("intel_texsubimage: temp bo upload failed\n");
890 brw_bo_unreference(bo
);
899 /* Consider all the restrictions and determine the format of the source. */
901 blorp_get_client_format(struct brw_context
*brw
,
902 GLenum format
, GLenum type
,
903 const struct gl_pixelstore_attrib
*packing
)
905 if (brw
->ctx
._ImageTransferState
)
906 return MESA_FORMAT_NONE
;
908 if (packing
->SwapBytes
|| packing
->LsbFirst
|| packing
->Invert
) {
909 perf_debug("intel_texsubimage_blorp: unsupported gl_pixelstore_attrib\n");
910 return MESA_FORMAT_NONE
;
913 if (format
!= GL_RED
&&
919 format
!= GL_ALPHA
&&
920 format
!= GL_RED_INTEGER
&&
921 format
!= GL_RG_INTEGER
&&
922 format
!= GL_RGB_INTEGER
&&
923 format
!= GL_BGR_INTEGER
&&
924 format
!= GL_RGBA_INTEGER
&&
925 format
!= GL_BGRA_INTEGER
) {
926 perf_debug("intel_texsubimage_blorp: %s not supported",
927 _mesa_enum_to_string(format
));
928 return MESA_FORMAT_NONE
;
931 return _mesa_tex_format_from_format_and_type(&brw
->ctx
, format
, type
);
935 need_signed_unsigned_int_conversion(mesa_format src_format
,
936 mesa_format dst_format
)
938 const GLenum src_type
= _mesa_get_format_datatype(src_format
);
939 const GLenum dst_type
= _mesa_get_format_datatype(dst_format
);
940 return (src_type
== GL_INT
&& dst_type
== GL_UNSIGNED_INT
) ||
941 (src_type
== GL_UNSIGNED_INT
&& dst_type
== GL_INT
);
945 brw_blorp_upload_miptree(struct brw_context
*brw
,
946 struct intel_mipmap_tree
*dst_mt
,
947 mesa_format dst_format
,
948 uint32_t level
, uint32_t x
, uint32_t y
, uint32_t z
,
949 uint32_t width
, uint32_t height
, uint32_t depth
,
950 GLenum target
, GLenum format
, GLenum type
,
952 const struct gl_pixelstore_attrib
*packing
)
954 const mesa_format src_format
=
955 blorp_get_client_format(brw
, format
, type
, packing
);
956 if (src_format
== MESA_FORMAT_NONE
)
959 if (!brw
->mesa_format_supports_render
[dst_format
]) {
960 perf_debug("intel_texsubimage: can't use %s as render target\n",
961 _mesa_get_format_name(dst_format
));
965 /* This function relies on blorp_blit to upload the pixel data to the
966 * miptree. But, blorp_blit doesn't support signed to unsigned or
967 * unsigned to signed integer conversions.
969 if (need_signed_unsigned_int_conversion(src_format
, dst_format
))
972 uint32_t src_offset
, src_row_stride
, src_image_stride
;
973 struct brw_bo
*src_bo
=
974 blorp_get_client_bo(brw
, width
, height
, depth
,
975 target
, format
, type
, pixels
, packing
,
976 &src_offset
, &src_row_stride
,
977 &src_image_stride
, true);
981 /* Now that source is offset to correct starting point, adjust the
982 * given dimensions to treat 1D arrays as 2D.
984 if (target
== GL_TEXTURE_1D_ARRAY
) {
991 src_image_stride
= src_row_stride
;
994 intel_miptree_check_level_layer(dst_mt
, level
, z
+ depth
- 1);
998 /* Blit slice-by-slice creating a single-slice miptree for each layer. Even
999 * in case of linear buffers hardware wants image arrays to be aligned by
1000 * four rows. This way hardware only gets one image at a time and any
1001 * source alignment will do.
1003 for (unsigned i
= 0; i
< depth
; ++i
) {
1004 struct intel_mipmap_tree
*src_mt
= intel_miptree_create_for_bo(
1005 brw
, src_bo
, src_format
,
1006 src_offset
+ i
* src_image_stride
,
1009 ISL_TILING_LINEAR
, 0);
1012 perf_debug("intel_texsubimage: miptree creation for src failed\n");
1016 /* In case exact match is needed, copy using equivalent UINT formats
1017 * preventing hardware from changing presentation for SNORM -1.
1019 if (src_mt
->format
== dst_format
) {
1020 brw_blorp_copy_miptrees(brw
, src_mt
, 0, 0,
1021 dst_mt
, level
, z
+ i
,
1022 0, 0, x
, y
, width
, height
);
1024 brw_blorp_blit_miptrees(brw
, src_mt
, 0, 0,
1025 src_format
, SWIZZLE_XYZW
,
1026 dst_mt
, level
, z
+ i
,
1028 0, 0, width
, height
,
1029 x
, y
, x
+ width
, y
+ height
,
1030 GL_NEAREST
, false, false, false, false);
1033 intel_miptree_release(&src_mt
);
1039 brw_bo_unreference(src_bo
);
1045 brw_blorp_download_miptree(struct brw_context
*brw
,
1046 struct intel_mipmap_tree
*src_mt
,
1047 mesa_format src_format
, uint32_t src_swizzle
,
1048 uint32_t level
, uint32_t x
, uint32_t y
, uint32_t z
,
1049 uint32_t width
, uint32_t height
, uint32_t depth
,
1050 GLenum target
, GLenum format
, GLenum type
,
1051 bool y_flip
, const void *pixels
,
1052 const struct gl_pixelstore_attrib
*packing
)
1054 const mesa_format dst_format
=
1055 blorp_get_client_format(brw
, format
, type
, packing
);
1056 if (dst_format
== MESA_FORMAT_NONE
)
1059 if (!brw
->mesa_format_supports_render
[dst_format
]) {
1060 perf_debug("intel_texsubimage: can't use %s as render target\n",
1061 _mesa_get_format_name(dst_format
));
1065 /* This function relies on blorp_blit to download the pixel data from the
1066 * miptree. But, blorp_blit doesn't support signed to unsigned or unsigned
1067 * to signed integer conversions.
1069 if (need_signed_unsigned_int_conversion(src_format
, dst_format
))
1072 /* We can't fetch from LUMINANCE or intensity as that would require a
1073 * non-trivial swizzle.
1075 switch (_mesa_get_format_base_format(src_format
)) {
1077 case GL_LUMINANCE_ALPHA
:
1084 /* This pass only works for PBOs */
1085 assert(_mesa_is_bufferobj(packing
->BufferObj
));
1087 uint32_t dst_offset
, dst_row_stride
, dst_image_stride
;
1088 struct brw_bo
*dst_bo
=
1089 blorp_get_client_bo(brw
, width
, height
, depth
,
1090 target
, format
, type
, pixels
, packing
,
1091 &dst_offset
, &dst_row_stride
,
1092 &dst_image_stride
, false);
1096 /* Now that source is offset to correct starting point, adjust the
1097 * given dimensions to treat 1D arrays as 2D.
1099 if (target
== GL_TEXTURE_1D_ARRAY
) {
1106 dst_image_stride
= dst_row_stride
;
1109 intel_miptree_check_level_layer(src_mt
, level
, z
+ depth
- 1);
1112 int y1
= y
+ height
;
1114 apply_y_flip(&y0
, &y1
, minify(src_mt
->surf
.phys_level0_sa
.height
,
1115 level
- src_mt
->first_level
));
1118 bool result
= false;
1120 /* Blit slice-by-slice creating a single-slice miptree for each layer. Even
1121 * in case of linear buffers hardware wants image arrays to be aligned by
1122 * four rows. This way hardware only gets one image at a time and any
1123 * source alignment will do.
1125 for (unsigned i
= 0; i
< depth
; ++i
) {
1126 struct intel_mipmap_tree
*dst_mt
= intel_miptree_create_for_bo(
1127 brw
, dst_bo
, dst_format
,
1128 dst_offset
+ i
* dst_image_stride
,
1131 ISL_TILING_LINEAR
, 0);
1134 perf_debug("intel_texsubimage: miptree creation for src failed\n");
1138 /* In case exact match is needed, copy using equivalent UINT formats
1139 * preventing hardware from changing presentation for SNORM -1.
1141 if (dst_mt
->format
== src_format
&& !y_flip
&&
1142 src_swizzle
== SWIZZLE_XYZW
) {
1143 brw_blorp_copy_miptrees(brw
, src_mt
, level
, z
+ i
,
1145 x
, y
, 0, 0, width
, height
);
1147 brw_blorp_blit_miptrees(brw
, src_mt
, level
, z
+ i
,
1148 src_format
, src_swizzle
,
1149 dst_mt
, 0, 0, dst_format
,
1150 x
, y0
, x
+ width
, y1
,
1151 0, 0, width
, height
,
1152 GL_NEAREST
, false, y_flip
, false, false);
1155 intel_miptree_release(&dst_mt
);
1160 /* As we implement PBO transfers by binding the user-provided BO as a
1161 * fake framebuffer and rendering to it. This breaks the invariant of the
1162 * GL that nothing is able to render to a BO, causing nondeterministic
1163 * corruption issues because the render cache is not coherent with a
1164 * number of other caches that the BO could potentially be bound to
1167 * This could be solved in the same way that we guarantee texture
1168 * coherency after a texture is attached to a framebuffer and
1169 * rendered to, but that would involve checking *all* BOs bound to
1170 * the pipeline for the case we need to emit a cache flush due to
1171 * previous rendering to any of them -- Including vertex, index,
1172 * uniform, atomic counter, shader image, transform feedback,
1173 * indirect draw buffers, etc.
1175 * That would increase the per-draw call overhead even though it's
1176 * very unlikely that any of the BOs bound to the pipeline has been
1177 * rendered to via a PBO at any point, so it seems better to just
1178 * flush here unconditionally.
1180 brw_emit_mi_flush(brw
);
1183 brw_bo_unreference(dst_bo
);
1189 set_write_disables(const struct intel_renderbuffer
*irb
,
1190 const unsigned color_mask
, bool *color_write_disable
)
1192 /* Format information in the renderbuffer represents the requirements
1193 * given by the client. There are cases where the backing miptree uses,
1194 * for example, RGBA to represent RGBX. Since the client is only expecting
1195 * RGB we can treat alpha as not used and write whatever we like into it.
1197 const GLenum base_format
= irb
->Base
.Base
._BaseFormat
;
1198 const int components
= _mesa_components_in_format(base_format
);
1199 bool disables
= false;
1201 assert(components
> 0);
1203 for (int i
= 0; i
< components
; i
++) {
1204 color_write_disable
[i
] = !(color_mask
& (1 << i
));
1205 disables
= disables
|| color_write_disable
[i
];
1212 do_single_blorp_clear(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
1213 struct gl_renderbuffer
*rb
, unsigned buf
,
1214 bool partial_clear
, bool encode_srgb
)
1216 struct gl_context
*ctx
= &brw
->ctx
;
1217 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1218 uint32_t x0
, x1
, y0
, y1
;
1220 mesa_format format
= irb
->Base
.Base
.Format
;
1221 if (!encode_srgb
&& _mesa_get_format_color_encoding(format
) == GL_SRGB
)
1222 format
= _mesa_get_srgb_format_linear(format
);
1223 enum isl_format isl_format
= brw
->mesa_to_isl_render_format
[format
];
1228 y0
= rb
->Height
- fb
->_Ymax
;
1229 y1
= rb
->Height
- fb
->_Ymin
;
1235 /* If the clear region is empty, just return. */
1236 if (x0
== x1
|| y0
== y1
)
1239 bool can_fast_clear
= !partial_clear
;
1241 bool color_write_disable
[4] = { false, false, false, false };
1242 if (set_write_disables(irb
, GET_COLORMASK(ctx
->Color
.ColorMask
, buf
),
1243 color_write_disable
))
1244 can_fast_clear
= false;
1246 /* We store clear colors as floats or uints as needed. If there are
1247 * texture views in play, the formats will not properly be respected
1248 * during resolves because the resolve operations only know about the
1249 * miptree and not the renderbuffer.
1251 if (irb
->Base
.Base
.Format
!= irb
->mt
->format
)
1252 can_fast_clear
= false;
1254 if (!irb
->mt
->supports_fast_clear
||
1255 !brw_is_color_fast_clear_compatible(brw
, irb
->mt
, &ctx
->Color
.ClearColor
))
1256 can_fast_clear
= false;
1258 /* Surface state can only record one fast clear color value. Therefore
1259 * unless different levels/layers agree on the color it can be used to
1260 * represent only single level/layer. Here it will be reserved for the
1261 * first slice (level 0, layer 0).
1263 if (irb
->layer_count
> 1 || irb
->mt_level
|| irb
->mt_layer
)
1264 can_fast_clear
= false;
1266 unsigned level
= irb
->mt_level
;
1267 const unsigned num_layers
= fb
->MaxNumLayers
? irb
->layer_count
: 1;
1269 /* If the MCS buffer hasn't been allocated yet, we need to allocate it now.
1271 if (can_fast_clear
&& !irb
->mt
->aux_buf
) {
1272 assert(irb
->mt
->aux_usage
== ISL_AUX_USAGE_CCS_D
);
1273 if (!intel_miptree_alloc_aux(brw
, irb
->mt
)) {
1274 /* We're out of memory. Fall back to a non-fast clear. */
1275 can_fast_clear
= false;
1279 /* FINISHME: Debug and enable fast clears */
1280 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1281 if (devinfo
->gen
>= 11)
1282 can_fast_clear
= false;
1284 if (can_fast_clear
) {
1285 const enum isl_aux_state aux_state
=
1286 intel_miptree_get_aux_state(irb
->mt
, irb
->mt_level
, irb
->mt_layer
);
1287 union isl_color_value clear_color
=
1288 brw_meta_convert_fast_clear_color(brw
, irb
->mt
,
1289 &ctx
->Color
.ClearColor
);
1291 intel_miptree_set_clear_color(brw
, irb
->mt
, clear_color
);
1293 /* If the buffer is already in ISL_AUX_STATE_CLEAR, the clear
1294 * is redundant and can be skipped.
1296 if (aux_state
== ISL_AUX_STATE_CLEAR
)
1299 DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__
,
1300 irb
->mt
, irb
->mt_level
, irb
->mt_layer
, num_layers
);
1302 /* We can't setup the blorp_surf until we've allocated the MCS above */
1303 struct isl_surf isl_tmp
[2];
1304 struct blorp_surf surf
;
1305 blorp_surf_for_miptree(brw
, &surf
, irb
->mt
, irb
->mt
->aux_usage
, true,
1306 &level
, irb
->mt_layer
, num_layers
, isl_tmp
);
1308 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
1310 * "Any transition from any value in {Clear, Render, Resolve} to a
1311 * different value in {Clear, Render, Resolve} requires end of pipe
1314 * In other words, fast clear ops are not properly synchronized with
1315 * other drawing. We need to use a PIPE_CONTROL to ensure that the
1316 * contents of the previous draw hit the render target before we resolve
1317 * and again afterwards to ensure that the resolve is complete before we
1318 * do any more regular drawing.
1320 brw_emit_end_of_pipe_sync(brw
, PIPE_CONTROL_RENDER_TARGET_FLUSH
);
1322 struct blorp_batch batch
;
1323 blorp_batch_init(&brw
->blorp
, &batch
, brw
,
1324 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR
);
1325 blorp_fast_clear(&batch
, &surf
, isl_format
,
1326 level
, irb
->mt_layer
, num_layers
,
1328 blorp_batch_finish(&batch
);
1330 brw_emit_end_of_pipe_sync(brw
, PIPE_CONTROL_RENDER_TARGET_FLUSH
);
1332 /* Now that the fast clear has occurred, put the buffer in
1333 * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
1336 intel_miptree_set_aux_state(brw
, irb
->mt
, irb
->mt_level
,
1337 irb
->mt_layer
, num_layers
,
1338 ISL_AUX_STATE_CLEAR
);
1340 DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__
,
1341 irb
->mt
, irb
->mt_level
, irb
->mt_layer
, num_layers
);
1343 enum isl_aux_usage aux_usage
=
1344 intel_miptree_render_aux_usage(brw
, irb
->mt
, isl_format
,
1346 intel_miptree_prepare_render(brw
, irb
->mt
, level
, irb
->mt_layer
,
1347 num_layers
, aux_usage
);
1349 struct isl_surf isl_tmp
[2];
1350 struct blorp_surf surf
;
1351 blorp_surf_for_miptree(brw
, &surf
, irb
->mt
, aux_usage
, true,
1352 &level
, irb
->mt_layer
, num_layers
, isl_tmp
);
1354 union isl_color_value clear_color
;
1355 memcpy(clear_color
.f32
, ctx
->Color
.ClearColor
.f
, sizeof(float) * 4);
1357 struct blorp_batch batch
;
1358 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
1359 blorp_clear(&batch
, &surf
, isl_format
, ISL_SWIZZLE_IDENTITY
,
1360 level
, irb
->mt_layer
, num_layers
,
1362 clear_color
, color_write_disable
);
1363 blorp_batch_finish(&batch
);
1365 intel_miptree_finish_render(brw
, irb
->mt
, level
, irb
->mt_layer
,
1366 num_layers
, aux_usage
);
1373 brw_blorp_clear_color(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
1374 GLbitfield mask
, bool partial_clear
, bool encode_srgb
)
1376 for (unsigned buf
= 0; buf
< fb
->_NumColorDrawBuffers
; buf
++) {
1377 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[buf
];
1378 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
1380 /* Only clear the buffers present in the provided mask */
1381 if (((1 << fb
->_ColorDrawBufferIndexes
[buf
]) & mask
) == 0)
1384 /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
1385 * the framebuffer can be complete with some attachments missing. In
1386 * this case the _ColorDrawBuffers pointer will be NULL.
1391 do_single_blorp_clear(brw
, fb
, rb
, buf
, partial_clear
, encode_srgb
);
1392 irb
->need_downsample
= true;
1399 brw_blorp_clear_depth_stencil(struct brw_context
*brw
,
1400 struct gl_framebuffer
*fb
,
1401 GLbitfield mask
, bool partial_clear
)
1403 const struct gl_context
*ctx
= &brw
->ctx
;
1404 struct gl_renderbuffer
*depth_rb
=
1405 fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
;
1406 struct gl_renderbuffer
*stencil_rb
=
1407 fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
;
1409 if (!depth_rb
|| ctx
->Depth
.Mask
== GL_FALSE
)
1410 mask
&= ~BUFFER_BIT_DEPTH
;
1412 if (!stencil_rb
|| (ctx
->Stencil
.WriteMask
[0] & 0xff) == 0)
1413 mask
&= ~BUFFER_BIT_STENCIL
;
1415 if (!(mask
& (BUFFER_BITS_DEPTH_STENCIL
)))
1418 uint32_t x0
, x1
, y0
, y1
, rb_height
;
1420 rb_height
= depth_rb
->Height
;
1422 assert(depth_rb
->Width
== stencil_rb
->Width
);
1423 assert(depth_rb
->Height
== stencil_rb
->Height
);
1427 rb_height
= stencil_rb
->Height
;
1433 y0
= rb_height
- fb
->_Ymax
;
1434 y1
= rb_height
- fb
->_Ymin
;
1440 /* If the clear region is empty, just return. */
1441 if (x0
== x1
|| y0
== y1
)
1444 uint32_t level
, start_layer
, num_layers
;
1445 struct isl_surf isl_tmp
[4];
1446 struct blorp_surf depth_surf
, stencil_surf
;
1448 struct intel_mipmap_tree
*depth_mt
= NULL
;
1449 if (mask
& BUFFER_BIT_DEPTH
) {
1450 struct intel_renderbuffer
*irb
= intel_renderbuffer(depth_rb
);
1451 depth_mt
= find_miptree(GL_DEPTH_BUFFER_BIT
, irb
);
1453 level
= irb
->mt_level
;
1454 start_layer
= irb
->mt_layer
;
1455 num_layers
= fb
->MaxNumLayers
? irb
->layer_count
: 1;
1457 intel_miptree_prepare_depth(brw
, depth_mt
, level
,
1458 start_layer
, num_layers
);
1460 unsigned depth_level
= level
;
1461 blorp_surf_for_miptree(brw
, &depth_surf
, depth_mt
, depth_mt
->aux_usage
,
1462 true, &depth_level
, start_layer
, num_layers
,
1464 assert(depth_level
== level
);
1467 uint8_t stencil_mask
= 0;
1468 struct intel_mipmap_tree
*stencil_mt
= NULL
;
1469 if (mask
& BUFFER_BIT_STENCIL
) {
1470 struct intel_renderbuffer
*irb
= intel_renderbuffer(stencil_rb
);
1471 stencil_mt
= find_miptree(GL_STENCIL_BUFFER_BIT
, irb
);
1473 if (mask
& BUFFER_BIT_DEPTH
) {
1474 assert(level
== irb
->mt_level
);
1475 assert(start_layer
== irb
->mt_layer
);
1476 assert(num_layers
== fb
->MaxNumLayers
? irb
->layer_count
: 1);
1479 level
= irb
->mt_level
;
1480 start_layer
= irb
->mt_layer
;
1481 num_layers
= fb
->MaxNumLayers
? irb
->layer_count
: 1;
1483 stencil_mask
= ctx
->Stencil
.WriteMask
[0] & 0xff;
1485 intel_miptree_prepare_access(brw
, stencil_mt
, level
, 1,
1486 start_layer
, num_layers
,
1487 ISL_AUX_USAGE_NONE
, false);
1489 unsigned stencil_level
= level
;
1490 blorp_surf_for_miptree(brw
, &stencil_surf
, stencil_mt
,
1491 ISL_AUX_USAGE_NONE
, true,
1492 &stencil_level
, start_layer
, num_layers
,
1496 assert((mask
& BUFFER_BIT_DEPTH
) || stencil_mask
);
1498 struct blorp_batch batch
;
1499 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
1500 blorp_clear_depth_stencil(&batch
, &depth_surf
, &stencil_surf
,
1501 level
, start_layer
, num_layers
,
1503 (mask
& BUFFER_BIT_DEPTH
), ctx
->Depth
.Clear
,
1504 stencil_mask
, ctx
->Stencil
.Clear
);
1505 blorp_batch_finish(&batch
);
1507 if (mask
& BUFFER_BIT_DEPTH
) {
1508 intel_miptree_finish_depth(brw
, depth_mt
, level
,
1509 start_layer
, num_layers
, true);
1513 intel_miptree_finish_write(brw
, stencil_mt
, level
,
1514 start_layer
, num_layers
,
1515 ISL_AUX_USAGE_NONE
);
1520 brw_blorp_resolve_color(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
1521 unsigned level
, unsigned layer
,
1522 enum isl_aux_op resolve_op
)
1524 DBG("%s to mt %p level %u layer %u\n", __FUNCTION__
, mt
, level
, layer
);
1526 const mesa_format format
= _mesa_get_srgb_format_linear(mt
->format
);
1528 struct isl_surf isl_tmp
[1];
1529 struct blorp_surf surf
;
1530 blorp_surf_for_miptree(brw
, &surf
, mt
, mt
->aux_usage
, true,
1531 &level
, layer
, 1 /* num_layers */,
1534 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
1536 * "Any transition from any value in {Clear, Render, Resolve} to a
1537 * different value in {Clear, Render, Resolve} requires end of pipe
1540 * In other words, fast clear ops are not properly synchronized with
1541 * other drawing. We need to use a PIPE_CONTROL to ensure that the
1542 * contents of the previous draw hit the render target before we resolve
1543 * and again afterwards to ensure that the resolve is complete before we
1544 * do any more regular drawing.
1546 brw_emit_end_of_pipe_sync(brw
, PIPE_CONTROL_RENDER_TARGET_FLUSH
);
1549 struct blorp_batch batch
;
1550 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
1551 blorp_ccs_resolve(&batch
, &surf
, level
, layer
, 1,
1552 brw_blorp_to_isl_format(brw
, format
, true),
1554 blorp_batch_finish(&batch
);
1556 /* See comment above */
1557 brw_emit_end_of_pipe_sync(brw
, PIPE_CONTROL_RENDER_TARGET_FLUSH
);
1561 brw_blorp_mcs_partial_resolve(struct brw_context
*brw
,
1562 struct intel_mipmap_tree
*mt
,
1563 uint32_t start_layer
, uint32_t num_layers
)
1565 DBG("%s to mt %p layers %u-%u\n", __FUNCTION__
, mt
,
1566 start_layer
, start_layer
+ num_layers
- 1);
1568 assert(mt
->aux_usage
== ISL_AUX_USAGE_MCS
);
1570 const mesa_format format
= _mesa_get_srgb_format_linear(mt
->format
);
1571 enum isl_format isl_format
= brw_blorp_to_isl_format(brw
, format
, true);
1573 struct isl_surf isl_tmp
[1];
1574 struct blorp_surf surf
;
1576 blorp_surf_for_miptree(brw
, &surf
, mt
, ISL_AUX_USAGE_MCS
, true,
1577 &level
, start_layer
, num_layers
, isl_tmp
);
1579 struct blorp_batch batch
;
1580 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
1581 blorp_mcs_partial_resolve(&batch
, &surf
, isl_format
,
1582 start_layer
, num_layers
);
1583 blorp_batch_finish(&batch
);
1587 * Perform a HiZ or depth resolve operation.
1589 * For an overview of HiZ ops, see the following sections of the Sandy Bridge
1590 * PRM, Volume 1, Part 2:
1591 * - 7.5.3.1 Depth Buffer Clear
1592 * - 7.5.3.2 Depth Buffer Resolve
1593 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
1596 intel_hiz_exec(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
1597 unsigned int level
, unsigned int start_layer
,
1598 unsigned int num_layers
, enum isl_aux_op op
)
1600 assert(intel_miptree_level_has_hiz(mt
, level
));
1601 assert(op
!= ISL_AUX_OP_NONE
);
1602 const struct gen_device_info
*devinfo
= &brw
->screen
->devinfo
;
1603 const char *opname
= NULL
;
1606 case ISL_AUX_OP_FULL_RESOLVE
:
1607 opname
= "depth resolve";
1609 case ISL_AUX_OP_AMBIGUATE
:
1610 opname
= "hiz ambiguate";
1612 case ISL_AUX_OP_FAST_CLEAR
:
1613 opname
= "depth clear";
1615 case ISL_AUX_OP_PARTIAL_RESOLVE
:
1616 case ISL_AUX_OP_NONE
:
1617 unreachable("Invalid HiZ op");
1620 DBG("%s %s to mt %p level %d layers %d-%d\n",
1621 __func__
, opname
, mt
, level
, start_layer
, start_layer
+ num_layers
- 1);
1623 /* The following stalls and flushes are only documented to be required for
1624 * HiZ clear operations. However, they also seem to be required for
1625 * resolve operations.
1627 if (devinfo
->gen
== 6) {
1628 /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
1630 * "If other rendering operations have preceded this clear, a
1631 * PIPE_CONTROL with write cache flush enabled and Z-inhibit
1632 * disabled must be issued before the rectangle primitive used for
1633 * the depth buffer clear operation.
1635 brw_emit_pipe_control_flush(brw
,
1636 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
1637 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1638 PIPE_CONTROL_CS_STALL
);
1639 } else if (devinfo
->gen
>= 7) {
1641 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
1643 * If other rendering operations have preceded this clear, a
1644 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1645 * enabled must be issued before the rectangle primitive used for
1646 * the depth buffer clear operation.
1648 * Same applies for Gen8 and Gen9.
1650 * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
1651 * PIPE_CONTROL, Depth Cache Flush Enable:
1653 * This bit must not be set when Depth Stall Enable bit is set in
1656 * This is confirmed to hold for real, HSW gets immediate gpu hangs.
1658 * Therefore issue two pipe control flushes, one for cache flush and
1659 * another for depth stall.
1661 brw_emit_pipe_control_flush(brw
,
1662 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1663 PIPE_CONTROL_CS_STALL
);
1665 brw_emit_pipe_control_flush(brw
, PIPE_CONTROL_DEPTH_STALL
);
1668 assert(mt
->aux_usage
== ISL_AUX_USAGE_HIZ
&& mt
->aux_buf
);
1670 struct isl_surf isl_tmp
[2];
1671 struct blorp_surf surf
;
1672 blorp_surf_for_miptree(brw
, &surf
, mt
, ISL_AUX_USAGE_HIZ
, true,
1673 &level
, start_layer
, num_layers
, isl_tmp
);
1675 struct blorp_batch batch
;
1676 blorp_batch_init(&brw
->blorp
, &batch
, brw
,
1677 BLORP_BATCH_NO_UPDATE_CLEAR_COLOR
);
1678 blorp_hiz_op(&batch
, &surf
, level
, start_layer
, num_layers
, op
);
1679 blorp_batch_finish(&batch
);
1681 /* The following stalls and flushes are only documented to be required for
1682 * HiZ clear operations. However, they also seem to be required for
1683 * resolve operations.
1685 if (devinfo
->gen
== 6) {
1686 /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
1688 * "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be
1689 * followed by a PIPE_CONTROL command with DEPTH_STALL bit set
1690 * and Then followed by Depth FLUSH'
1692 brw_emit_pipe_control_flush(brw
,
1693 PIPE_CONTROL_DEPTH_STALL
);
1695 brw_emit_pipe_control_flush(brw
,
1696 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1697 PIPE_CONTROL_CS_STALL
);
1698 } else if (devinfo
->gen
>= 8) {
1700 * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
1702 * "Depth buffer clear pass using any of the methods (WM_STATE,
1703 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
1704 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
1705 * "set" before starting to render. DepthStall and DepthFlush are
1706 * not needed between consecutive depth clear passes nor is it
1707 * required if the depth clear pass was done with
1708 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
1710 * TODO: Such as the spec says, this could be conditional.
1712 brw_emit_pipe_control_flush(brw
,
1713 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1714 PIPE_CONTROL_DEPTH_STALL
);