2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "main/context.h"
25 #include "main/teximage.h"
26 #include "main/blend.h"
27 #include "main/fbobject.h"
28 #include "main/renderbuffer.h"
29 #include "main/glformats.h"
31 #include "brw_blorp.h"
32 #include "brw_context.h"
33 #include "brw_defines.h"
34 #include "brw_meta_util.h"
35 #include "brw_state.h"
36 #include "intel_fbo.h"
37 #include "common/gen_debug.h"
39 #define FILE_DEBUG_FLAG DEBUG_BLORP
42 brw_blorp_lookup_shader(struct blorp_context
*blorp
,
43 const void *key
, uint32_t key_size
,
44 uint32_t *kernel_out
, void *prog_data_out
)
46 struct brw_context
*brw
= blorp
->driver_ctx
;
47 return brw_search_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
,
48 key
, key_size
, kernel_out
, prog_data_out
);
52 brw_blorp_upload_shader(struct blorp_context
*blorp
,
53 const void *key
, uint32_t key_size
,
54 const void *kernel
, uint32_t kernel_size
,
55 const struct brw_stage_prog_data
*prog_data
,
56 uint32_t prog_data_size
,
57 uint32_t *kernel_out
, void *prog_data_out
)
59 struct brw_context
*brw
= blorp
->driver_ctx
;
60 brw_upload_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
, key
, key_size
,
61 kernel
, kernel_size
, prog_data
, prog_data_size
,
62 kernel_out
, prog_data_out
);
67 brw_blorp_init(struct brw_context
*brw
)
69 blorp_init(&brw
->blorp
, brw
, &brw
->isl_dev
);
71 brw
->blorp
.compiler
= brw
->screen
->compiler
;
76 brw
->blorp
.exec
= gen45_blorp_exec
;
78 brw
->blorp
.exec
= gen4_blorp_exec
;
82 brw
->blorp
.exec
= gen5_blorp_exec
;
85 brw
->blorp
.mocs
.tex
= 0;
86 brw
->blorp
.mocs
.rb
= 0;
87 brw
->blorp
.mocs
.vb
= 0;
88 brw
->blorp
.exec
= gen6_blorp_exec
;
91 brw
->blorp
.mocs
.tex
= GEN7_MOCS_L3
;
92 brw
->blorp
.mocs
.rb
= GEN7_MOCS_L3
;
93 brw
->blorp
.mocs
.vb
= GEN7_MOCS_L3
;
94 if (brw
->is_haswell
) {
95 brw
->blorp
.exec
= gen75_blorp_exec
;
97 brw
->blorp
.exec
= gen7_blorp_exec
;
101 brw
->blorp
.mocs
.tex
= BDW_MOCS_WB
;
102 brw
->blorp
.mocs
.rb
= BDW_MOCS_PTE
;
103 brw
->blorp
.mocs
.vb
= BDW_MOCS_WB
;
104 brw
->blorp
.exec
= gen8_blorp_exec
;
107 brw
->blorp
.mocs
.tex
= SKL_MOCS_WB
;
108 brw
->blorp
.mocs
.rb
= SKL_MOCS_PTE
;
109 brw
->blorp
.mocs
.vb
= SKL_MOCS_WB
;
110 brw
->blorp
.exec
= gen9_blorp_exec
;
113 unreachable("Invalid gen");
116 brw
->blorp
.lookup_shader
= brw_blorp_lookup_shader
;
117 brw
->blorp
.upload_shader
= brw_blorp_upload_shader
;
121 blorp_surf_for_miptree(struct brw_context
*brw
,
122 struct blorp_surf
*surf
,
123 struct intel_mipmap_tree
*mt
,
124 bool is_render_target
,
126 uint32_t safe_aux_usage
,
128 unsigned start_layer
, unsigned num_layers
,
129 struct isl_surf tmp_surfs
[2])
131 if (mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
132 mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) {
133 const unsigned num_samples
= MAX2(1, mt
->num_samples
);
134 for (unsigned i
= 0; i
< num_layers
; i
++) {
135 for (unsigned s
= 0; s
< num_samples
; s
++) {
136 const unsigned phys_layer
= (start_layer
+ i
) * num_samples
+ s
;
137 intel_miptree_check_level_layer(mt
, *level
, phys_layer
);
141 for (unsigned i
= 0; i
< num_layers
; i
++)
142 intel_miptree_check_level_layer(mt
, *level
, start_layer
+ i
);
145 intel_miptree_get_isl_surf(brw
, mt
, &tmp_surfs
[0]);
146 surf
->surf
= &tmp_surfs
[0];
147 surf
->addr
= (struct blorp_address
) {
149 .offset
= mt
->offset
,
150 .read_domains
= is_render_target
? I915_GEM_DOMAIN_RENDER
:
151 I915_GEM_DOMAIN_SAMPLER
,
152 .write_domain
= is_render_target
? I915_GEM_DOMAIN_RENDER
: 0,
155 struct isl_surf
*aux_surf
= &tmp_surfs
[1];
156 intel_miptree_get_aux_isl_surf(brw
, mt
, aux_surf
, &surf
->aux_usage
);
159 bool supports_aux
= surf
->aux_usage
!= ISL_AUX_USAGE_NONE
&&
160 (safe_aux_usage
& (1 << surf
->aux_usage
));
161 intel_miptree_prepare_access(brw
, mt
, *level
, 1, start_layer
, num_layers
,
162 supports_aux
, supports_aux
);
164 surf
->aux_usage
= ISL_AUX_USAGE_NONE
;
166 if (is_render_target
) {
167 intel_miptree_finish_write(brw
, mt
, *level
, start_layer
, num_layers
,
172 if (surf
->aux_usage
!= ISL_AUX_USAGE_NONE
) {
173 /* We only really need a clear color if we also have an auxiliary
174 * surface. Without one, it does nothing.
176 surf
->clear_color
= mt
->fast_clear_color
;
178 surf
->aux_surf
= aux_surf
;
179 surf
->aux_addr
= (struct blorp_address
) {
180 .read_domains
= is_render_target
? I915_GEM_DOMAIN_RENDER
:
181 I915_GEM_DOMAIN_SAMPLER
,
182 .write_domain
= is_render_target
? I915_GEM_DOMAIN_RENDER
: 0,
186 surf
->aux_addr
.buffer
= mt
->mcs_buf
->bo
;
187 surf
->aux_addr
.offset
= mt
->mcs_buf
->offset
;
189 assert(surf
->aux_usage
== ISL_AUX_USAGE_HIZ
);
191 surf
->aux_addr
.buffer
= mt
->hiz_buf
->aux_base
.bo
;
192 surf
->aux_addr
.offset
= mt
->hiz_buf
->aux_base
.offset
;
195 surf
->aux_addr
= (struct blorp_address
) {
198 memset(&surf
->clear_color
, 0, sizeof(surf
->clear_color
));
200 assert((surf
->aux_usage
== ISL_AUX_USAGE_NONE
) ==
201 (surf
->aux_addr
.buffer
== NULL
));
203 /* ISL wants real levels, not offset ones. */
204 *level
-= mt
->first_level
;
207 static enum isl_format
208 brw_blorp_to_isl_format(struct brw_context
*brw
, mesa_format format
,
209 bool is_render_target
)
212 case MESA_FORMAT_NONE
:
213 return ISL_FORMAT_UNSUPPORTED
;
214 case MESA_FORMAT_S_UINT8
:
215 return ISL_FORMAT_R8_UINT
;
216 case MESA_FORMAT_Z24_UNORM_X8_UINT
:
217 case MESA_FORMAT_Z24_UNORM_S8_UINT
:
218 return ISL_FORMAT_R24_UNORM_X8_TYPELESS
;
219 case MESA_FORMAT_Z_FLOAT32
:
220 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT
:
221 return ISL_FORMAT_R32_FLOAT
;
222 case MESA_FORMAT_Z_UNORM16
:
223 return ISL_FORMAT_R16_UNORM
;
225 if (is_render_target
) {
226 assert(brw
->format_supported_as_render_target
[format
]);
227 return brw
->render_target_format
[format
];
229 return brw_isl_format_for_mesa_format(format
);
237 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
238 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
240 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
243 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
245 * which is simply adding 4 then modding by 8 (or anding with 7).
247 * We then may need to apply workarounds for textureGather hardware bugs.
249 static enum isl_channel_select
250 swizzle_to_scs(GLenum swizzle
)
252 return (enum isl_channel_select
)((swizzle
+ 4) & 7);
256 physical_to_logical_layer(struct intel_mipmap_tree
*mt
,
257 unsigned physical_layer
)
259 if (mt
->num_samples
> 1 &&
260 (mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
261 mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
)) {
262 assert(physical_layer
% mt
->num_samples
== 0);
263 return physical_layer
/ mt
->num_samples
;
265 return physical_layer
;
270 * Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using
271 * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
272 * the physical layer holding sample 0. So, for example, if
273 * src_mt->num_samples == 4, then logical layer n corresponds to src_layer ==
277 brw_blorp_blit_miptrees(struct brw_context
*brw
,
278 struct intel_mipmap_tree
*src_mt
,
279 unsigned src_level
, unsigned src_layer
,
280 mesa_format src_format
, int src_swizzle
,
281 struct intel_mipmap_tree
*dst_mt
,
282 unsigned dst_level
, unsigned dst_layer
,
283 mesa_format dst_format
,
284 float src_x0
, float src_y0
,
285 float src_x1
, float src_y1
,
286 float dst_x0
, float dst_y0
,
287 float dst_x1
, float dst_y1
,
288 GLenum filter
, bool mirror_x
, bool mirror_y
,
289 bool decode_srgb
, bool encode_srgb
)
291 /* Blorp operates in logical layers */
292 src_layer
= physical_to_logical_layer(src_mt
, src_layer
);
293 dst_layer
= physical_to_logical_layer(dst_mt
, dst_layer
);
295 DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f)"
296 "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
298 src_mt
->num_samples
, _mesa_get_format_name(src_mt
->format
), src_mt
,
299 src_level
, src_layer
, src_x0
, src_y0
, src_x1
, src_y1
,
300 dst_mt
->num_samples
, _mesa_get_format_name(dst_mt
->format
), dst_mt
,
301 dst_level
, dst_layer
, dst_x0
, dst_y0
, dst_x1
, dst_y1
,
304 if (!decode_srgb
&& _mesa_get_format_color_encoding(src_format
) == GL_SRGB
)
305 src_format
= _mesa_get_srgb_format_linear(src_format
);
307 if (!encode_srgb
&& _mesa_get_format_color_encoding(dst_format
) == GL_SRGB
)
308 dst_format
= _mesa_get_srgb_format_linear(dst_format
);
310 /* When doing a multisample resolve of a GL_LUMINANCE32F or GL_INTENSITY32F
311 * texture, the above code configures the source format for L32_FLOAT or
312 * I32_FLOAT, and the destination format for R32_FLOAT. On Sandy Bridge,
313 * the SAMPLE message appears to handle multisampled L32_FLOAT and
314 * I32_FLOAT textures incorrectly, resulting in blocky artifacts. So work
315 * around the problem by using a source format of R32_FLOAT. This
316 * shouldn't affect rendering correctness, since the destination format is
317 * R32_FLOAT, so only the contents of the red channel matters.
320 src_mt
->num_samples
> 1 && dst_mt
->num_samples
<= 1 &&
321 src_mt
->format
== dst_mt
->format
&&
322 (dst_format
== MESA_FORMAT_L_FLOAT32
||
323 dst_format
== MESA_FORMAT_I_FLOAT32
)) {
324 src_format
= dst_format
= MESA_FORMAT_R_FLOAT32
;
327 uint32_t src_usage_flags
= (1 << ISL_AUX_USAGE_MCS
);
328 if (src_format
== src_mt
->format
)
329 src_usage_flags
|= (1 << ISL_AUX_USAGE_CCS_E
);
331 uint32_t dst_usage_flags
= (1 << ISL_AUX_USAGE_MCS
);
332 if (dst_format
== dst_mt
->format
) {
333 dst_usage_flags
|= (1 << ISL_AUX_USAGE_CCS_E
) |
334 (1 << ISL_AUX_USAGE_CCS_D
);
337 struct isl_surf tmp_surfs
[4];
338 struct blorp_surf src_surf
, dst_surf
;
339 blorp_surf_for_miptree(brw
, &src_surf
, src_mt
, false, true, src_usage_flags
,
340 &src_level
, src_layer
, 1, &tmp_surfs
[0]);
341 blorp_surf_for_miptree(brw
, &dst_surf
, dst_mt
, true, true, dst_usage_flags
,
342 &dst_level
, dst_layer
, 1, &tmp_surfs
[2]);
344 struct isl_swizzle src_isl_swizzle
= {
345 .r
= swizzle_to_scs(GET_SWZ(src_swizzle
, 0)),
346 .g
= swizzle_to_scs(GET_SWZ(src_swizzle
, 1)),
347 .b
= swizzle_to_scs(GET_SWZ(src_swizzle
, 2)),
348 .a
= swizzle_to_scs(GET_SWZ(src_swizzle
, 3)),
351 struct blorp_batch batch
;
352 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
353 blorp_blit(&batch
, &src_surf
, src_level
, src_layer
,
354 brw_blorp_to_isl_format(brw
, src_format
, false), src_isl_swizzle
,
355 &dst_surf
, dst_level
, dst_layer
,
356 brw_blorp_to_isl_format(brw
, dst_format
, true),
357 ISL_SWIZZLE_IDENTITY
,
358 src_x0
, src_y0
, src_x1
, src_y1
,
359 dst_x0
, dst_y0
, dst_x1
, dst_y1
,
360 filter
, mirror_x
, mirror_y
);
361 blorp_batch_finish(&batch
);
365 brw_blorp_copy_miptrees(struct brw_context
*brw
,
366 struct intel_mipmap_tree
*src_mt
,
367 unsigned src_level
, unsigned src_layer
,
368 struct intel_mipmap_tree
*dst_mt
,
369 unsigned dst_level
, unsigned dst_layer
,
370 unsigned src_x
, unsigned src_y
,
371 unsigned dst_x
, unsigned dst_y
,
372 unsigned src_width
, unsigned src_height
)
374 DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d"
375 "to %dx %s mt %p %d %d (%d,%d)\n",
377 src_mt
->num_samples
, _mesa_get_format_name(src_mt
->format
), src_mt
,
378 src_level
, src_layer
, src_x
, src_y
, src_width
, src_height
,
379 dst_mt
->num_samples
, _mesa_get_format_name(dst_mt
->format
), dst_mt
,
380 dst_level
, dst_layer
, dst_x
, dst_y
);
382 struct isl_surf tmp_surfs
[4];
383 struct blorp_surf src_surf
, dst_surf
;
384 blorp_surf_for_miptree(brw
, &src_surf
, src_mt
, false, true,
385 (1 << ISL_AUX_USAGE_MCS
) |
386 (1 << ISL_AUX_USAGE_CCS_E
),
387 &src_level
, src_layer
, 1, &tmp_surfs
[0]);
388 blorp_surf_for_miptree(brw
, &dst_surf
, dst_mt
, true, true,
389 (1 << ISL_AUX_USAGE_MCS
) |
390 (1 << ISL_AUX_USAGE_CCS_E
),
391 &dst_level
, dst_layer
, 1, &tmp_surfs
[2]);
393 struct blorp_batch batch
;
394 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
395 blorp_copy(&batch
, &src_surf
, src_level
, src_layer
,
396 &dst_surf
, dst_level
, dst_layer
,
397 src_x
, src_y
, dst_x
, dst_y
, src_width
, src_height
);
398 blorp_batch_finish(&batch
);
401 static struct intel_mipmap_tree
*
402 find_miptree(GLbitfield buffer_bit
, struct intel_renderbuffer
*irb
)
404 struct intel_mipmap_tree
*mt
= irb
->mt
;
405 if (buffer_bit
== GL_STENCIL_BUFFER_BIT
&& mt
->stencil_mt
)
411 blorp_get_texture_swizzle(const struct intel_renderbuffer
*irb
)
413 return irb
->Base
.Base
._BaseFormat
== GL_RGB
?
414 MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ONE
) :
419 do_blorp_blit(struct brw_context
*brw
, GLbitfield buffer_bit
,
420 struct intel_renderbuffer
*src_irb
, mesa_format src_format
,
421 struct intel_renderbuffer
*dst_irb
, mesa_format dst_format
,
422 GLfloat srcX0
, GLfloat srcY0
, GLfloat srcX1
, GLfloat srcY1
,
423 GLfloat dstX0
, GLfloat dstY0
, GLfloat dstX1
, GLfloat dstY1
,
424 GLenum filter
, bool mirror_x
, bool mirror_y
)
426 const struct gl_context
*ctx
= &brw
->ctx
;
428 /* Find source/dst miptrees */
429 struct intel_mipmap_tree
*src_mt
= find_miptree(buffer_bit
, src_irb
);
430 struct intel_mipmap_tree
*dst_mt
= find_miptree(buffer_bit
, dst_irb
);
432 const bool do_srgb
= ctx
->Color
.sRGBEnabled
;
435 brw_blorp_blit_miptrees(brw
,
436 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
437 src_format
, blorp_get_texture_swizzle(src_irb
),
438 dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
,
440 srcX0
, srcY0
, srcX1
, srcY1
,
441 dstX0
, dstY0
, dstX1
, dstY1
,
442 filter
, mirror_x
, mirror_y
,
445 dst_irb
->need_downsample
= true;
449 try_blorp_blit(struct brw_context
*brw
,
450 const struct gl_framebuffer
*read_fb
,
451 const struct gl_framebuffer
*draw_fb
,
452 GLfloat srcX0
, GLfloat srcY0
, GLfloat srcX1
, GLfloat srcY1
,
453 GLfloat dstX0
, GLfloat dstY0
, GLfloat dstX1
, GLfloat dstY1
,
454 GLenum filter
, GLbitfield buffer_bit
)
456 struct gl_context
*ctx
= &brw
->ctx
;
458 /* Sync up the state of window system buffers. We need to do this before
459 * we go looking for the buffers.
461 intel_prepare_render(brw
);
463 bool mirror_x
, mirror_y
;
464 if (brw_meta_mirror_clip_and_scissor(ctx
, read_fb
, draw_fb
,
465 &srcX0
, &srcY0
, &srcX1
, &srcY1
,
466 &dstX0
, &dstY0
, &dstX1
, &dstY1
,
467 &mirror_x
, &mirror_y
))
471 struct intel_renderbuffer
*src_irb
;
472 struct intel_renderbuffer
*dst_irb
;
473 struct intel_mipmap_tree
*src_mt
;
474 struct intel_mipmap_tree
*dst_mt
;
475 switch (buffer_bit
) {
476 case GL_COLOR_BUFFER_BIT
:
477 src_irb
= intel_renderbuffer(read_fb
->_ColorReadBuffer
);
478 for (unsigned i
= 0; i
< draw_fb
->_NumColorDrawBuffers
; ++i
) {
479 dst_irb
= intel_renderbuffer(draw_fb
->_ColorDrawBuffers
[i
]);
481 do_blorp_blit(brw
, buffer_bit
,
482 src_irb
, src_irb
->Base
.Base
.Format
,
483 dst_irb
, dst_irb
->Base
.Base
.Format
,
484 srcX0
, srcY0
, srcX1
, srcY1
,
485 dstX0
, dstY0
, dstX1
, dstY1
,
486 filter
, mirror_x
, mirror_y
);
489 case GL_DEPTH_BUFFER_BIT
:
491 intel_renderbuffer(read_fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
);
493 intel_renderbuffer(draw_fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
);
494 src_mt
= find_miptree(buffer_bit
, src_irb
);
495 dst_mt
= find_miptree(buffer_bit
, dst_irb
);
497 /* We can't handle format conversions between Z24 and other formats
498 * since we have to lie about the surface format. See the comments in
499 * brw_blorp_surface_info::set().
501 if ((src_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
) !=
502 (dst_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
))
505 /* We also can't handle any combined depth-stencil formats because we
506 * have to reinterpret as a color format.
508 if (_mesa_get_format_base_format(src_mt
->format
) == GL_DEPTH_STENCIL
||
509 _mesa_get_format_base_format(dst_mt
->format
) == GL_DEPTH_STENCIL
)
512 do_blorp_blit(brw
, buffer_bit
, src_irb
, MESA_FORMAT_NONE
,
513 dst_irb
, MESA_FORMAT_NONE
, srcX0
, srcY0
,
514 srcX1
, srcY1
, dstX0
, dstY0
, dstX1
, dstY1
,
515 filter
, mirror_x
, mirror_y
);
517 case GL_STENCIL_BUFFER_BIT
:
518 /* Blorp doesn't support combined depth stencil which is all we have
525 intel_renderbuffer(read_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
);
527 intel_renderbuffer(draw_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
);
528 do_blorp_blit(brw
, buffer_bit
, src_irb
, MESA_FORMAT_NONE
,
529 dst_irb
, MESA_FORMAT_NONE
, srcX0
, srcY0
,
530 srcX1
, srcY1
, dstX0
, dstY0
, dstX1
, dstY1
,
531 filter
, mirror_x
, mirror_y
);
534 unreachable("not reached");
541 brw_blorp_copytexsubimage(struct brw_context
*brw
,
542 struct gl_renderbuffer
*src_rb
,
543 struct gl_texture_image
*dst_image
,
545 int srcX0
, int srcY0
,
546 int dstX0
, int dstY0
,
547 int width
, int height
)
549 struct gl_context
*ctx
= &brw
->ctx
;
550 struct intel_renderbuffer
*src_irb
= intel_renderbuffer(src_rb
);
551 struct intel_texture_image
*intel_image
= intel_texture_image(dst_image
);
553 /* No pixel transfer operations (zoom, bias, mapping), just a blit */
554 if (brw
->ctx
._ImageTransferState
)
557 /* Sync up the state of window system buffers. We need to do this before
558 * we go looking at the src renderbuffer's miptree.
560 intel_prepare_render(brw
);
562 struct intel_mipmap_tree
*src_mt
= src_irb
->mt
;
563 struct intel_mipmap_tree
*dst_mt
= intel_image
->mt
;
565 /* There is support for only up to eight samples. */
566 if (src_mt
->num_samples
> 8 || dst_mt
->num_samples
> 8)
569 if (_mesa_get_format_base_format(src_rb
->Format
) !=
570 _mesa_get_format_base_format(dst_image
->TexFormat
)) {
574 /* We can't handle format conversions between Z24 and other formats since
575 * we have to lie about the surface format. See the comments in
576 * brw_blorp_surface_info::set().
578 if ((src_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
) !=
579 (dst_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
)) {
583 /* We also can't handle any combined depth-stencil formats because we
584 * have to reinterpret as a color format.
586 if (_mesa_get_format_base_format(src_mt
->format
) == GL_DEPTH_STENCIL
||
587 _mesa_get_format_base_format(dst_mt
->format
) == GL_DEPTH_STENCIL
)
590 if (!brw
->format_supported_as_render_target
[dst_image
->TexFormat
])
593 /* Source clipping shouldn't be necessary, since copytexsubimage (in
594 * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
597 * Destination clipping shouldn't be necessary since the restrictions on
598 * glCopyTexSubImage prevent the user from specifying a destination rectangle
599 * that falls outside the bounds of the destination texture.
600 * See error_check_subtexture_dimensions().
603 int srcY1
= srcY0
+ height
;
604 int srcX1
= srcX0
+ width
;
605 int dstX1
= dstX0
+ width
;
606 int dstY1
= dstY0
+ height
;
608 /* Account for the fact that in the system framebuffer, the origin is at
611 bool mirror_y
= false;
612 if (_mesa_is_winsys_fbo(ctx
->ReadBuffer
)) {
613 GLint tmp
= src_rb
->Height
- srcY0
;
614 srcY0
= src_rb
->Height
- srcY1
;
619 /* Account for face selection and texture view MinLayer */
620 int dst_slice
= slice
+ dst_image
->TexObject
->MinLayer
+ dst_image
->Face
;
621 int dst_level
= dst_image
->Level
+ dst_image
->TexObject
->MinLevel
;
623 brw_blorp_blit_miptrees(brw
,
624 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
625 src_rb
->Format
, blorp_get_texture_swizzle(src_irb
),
626 dst_mt
, dst_level
, dst_slice
,
627 dst_image
->TexFormat
,
628 srcX0
, srcY0
, srcX1
, srcY1
,
629 dstX0
, dstY0
, dstX1
, dstY1
,
630 GL_NEAREST
, false, mirror_y
,
633 /* If we're copying to a packed depth stencil texture and the source
634 * framebuffer has separate stencil, we need to also copy the stencil data
637 src_rb
= ctx
->ReadBuffer
->Attachment
[BUFFER_STENCIL
].Renderbuffer
;
638 if (_mesa_get_format_bits(dst_image
->TexFormat
, GL_STENCIL_BITS
) > 0 &&
640 src_irb
= intel_renderbuffer(src_rb
);
641 src_mt
= src_irb
->mt
;
643 if (src_mt
->stencil_mt
)
644 src_mt
= src_mt
->stencil_mt
;
645 if (dst_mt
->stencil_mt
)
646 dst_mt
= dst_mt
->stencil_mt
;
648 if (src_mt
!= dst_mt
) {
649 brw_blorp_blit_miptrees(brw
,
650 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
652 blorp_get_texture_swizzle(src_irb
),
653 dst_mt
, dst_level
, dst_slice
,
655 srcX0
, srcY0
, srcX1
, srcY1
,
656 dstX0
, dstY0
, dstX1
, dstY1
,
657 GL_NEAREST
, false, mirror_y
,
667 brw_blorp_framebuffer(struct brw_context
*brw
,
668 struct gl_framebuffer
*readFb
,
669 struct gl_framebuffer
*drawFb
,
670 GLint srcX0
, GLint srcY0
, GLint srcX1
, GLint srcY1
,
671 GLint dstX0
, GLint dstY0
, GLint dstX1
, GLint dstY1
,
672 GLbitfield mask
, GLenum filter
)
674 static GLbitfield buffer_bits
[] = {
677 GL_STENCIL_BUFFER_BIT
,
680 for (unsigned int i
= 0; i
< ARRAY_SIZE(buffer_bits
); ++i
) {
681 if ((mask
& buffer_bits
[i
]) &&
682 try_blorp_blit(brw
, readFb
, drawFb
,
683 srcX0
, srcY0
, srcX1
, srcY1
,
684 dstX0
, dstY0
, dstX1
, dstY1
,
685 filter
, buffer_bits
[i
])) {
686 mask
&= ~buffer_bits
[i
];
694 set_write_disables(const struct intel_renderbuffer
*irb
,
695 const GLubyte
*color_mask
, bool *color_write_disable
)
697 /* Format information in the renderbuffer represents the requirements
698 * given by the client. There are cases where the backing miptree uses,
699 * for example, RGBA to represent RGBX. Since the client is only expecting
700 * RGB we can treat alpha as not used and write whatever we like into it.
702 const GLenum base_format
= irb
->Base
.Base
._BaseFormat
;
703 const int components
= _mesa_base_format_component_count(base_format
);
704 bool disables
= false;
706 assert(components
> 0);
708 for (int i
= 0; i
< components
; i
++) {
709 color_write_disable
[i
] = !color_mask
[i
];
710 disables
= disables
|| !color_mask
[i
];
717 irb_logical_mt_layer(struct intel_renderbuffer
*irb
)
719 return physical_to_logical_layer(irb
->mt
, irb
->mt_layer
);
723 do_single_blorp_clear(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
724 struct gl_renderbuffer
*rb
, unsigned buf
,
725 bool partial_clear
, bool encode_srgb
)
727 struct gl_context
*ctx
= &brw
->ctx
;
728 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
729 mesa_format format
= irb
->mt
->format
;
730 uint32_t x0
, x1
, y0
, y1
;
732 if (!encode_srgb
&& _mesa_get_format_color_encoding(format
) == GL_SRGB
)
733 format
= _mesa_get_srgb_format_linear(format
);
741 y0
= rb
->Height
- fb
->_Ymax
;
742 y1
= rb
->Height
- fb
->_Ymin
;
745 /* If the clear region is empty, just return. */
746 if (x0
== x1
|| y0
== y1
)
749 bool can_fast_clear
= !partial_clear
;
751 bool color_write_disable
[4] = { false, false, false, false };
752 if (set_write_disables(irb
, ctx
->Color
.ColorMask
[buf
], color_write_disable
))
753 can_fast_clear
= false;
755 if (irb
->mt
->aux_disable
& INTEL_AUX_DISABLE_CCS
||
756 !brw_is_color_fast_clear_compatible(brw
, irb
->mt
, &ctx
->Color
.ClearColor
))
757 can_fast_clear
= false;
759 const unsigned logical_layer
= irb_logical_mt_layer(irb
);
761 /* Surface state can only record one fast clear color value. Therefore
762 * unless different levels/layers agree on the color it can be used to
763 * represent only single level/layer. Here it will be reserved for the
764 * first slice (level 0, layer 0).
766 if (irb
->layer_count
> 1 || irb
->mt_level
|| irb
->mt_layer
)
767 can_fast_clear
= false;
769 unsigned level
= irb
->mt_level
;
770 const unsigned num_layers
= fb
->MaxNumLayers
? irb
->layer_count
: 1;
772 if (can_fast_clear
) {
773 /* If the MCS buffer hasn't been allocated yet, we need to allocate
776 if (!irb
->mt
->mcs_buf
) {
777 assert(!intel_miptree_is_lossless_compressed(brw
, irb
->mt
));
778 if (!intel_miptree_alloc_non_msrt_mcs(brw
, irb
->mt
, false)) {
779 /* MCS allocation failed--probably this will only happen in
780 * out-of-memory conditions. But in any case, try to recover
781 * by falling back to a non-blorp clear technique.
787 const enum intel_fast_clear_state fast_clear_state
=
788 intel_miptree_get_fast_clear_state(irb
->mt
, irb
->mt_level
,
790 union isl_color_value clear_color
=
791 brw_meta_convert_fast_clear_color(brw
, irb
->mt
,
792 &ctx
->Color
.ClearColor
);
794 /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
795 * is redundant and can be skipped.
797 if (fast_clear_state
== INTEL_FAST_CLEAR_STATE_CLEAR
&&
798 memcmp(&irb
->mt
->fast_clear_color
,
799 &clear_color
, sizeof(clear_color
)) == 0)
802 irb
->mt
->fast_clear_color
= clear_color
;
804 DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__
,
805 irb
->mt
, irb
->mt_level
, irb
->mt_layer
, num_layers
);
807 /* We can't setup the blorp_surf until we've allocated the MCS above */
808 struct isl_surf isl_tmp
[2];
809 struct blorp_surf surf
;
810 blorp_surf_for_miptree(brw
, &surf
, irb
->mt
, true, false, 0,
811 &level
, logical_layer
, num_layers
, isl_tmp
);
813 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
815 * "Any transition from any value in {Clear, Render, Resolve} to a
816 * different value in {Clear, Render, Resolve} requires end of pipe
819 * In other words, fast clear ops are not properly synchronized with
820 * other drawing. We need to use a PIPE_CONTROL to ensure that the
821 * contents of the previous draw hit the render target before we resolve
822 * and again afterwards to ensure that the resolve is complete before we
823 * do any more regular drawing.
825 brw_emit_pipe_control_flush(brw
,
826 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
827 PIPE_CONTROL_CS_STALL
);
829 struct blorp_batch batch
;
830 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
831 blorp_fast_clear(&batch
, &surf
,
832 brw
->render_target_format
[format
],
833 level
, logical_layer
, num_layers
,
835 blorp_batch_finish(&batch
);
837 brw_emit_pipe_control_flush(brw
,
838 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
839 PIPE_CONTROL_CS_STALL
);
841 /* Now that the fast clear has occurred, put the buffer in
842 * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
845 intel_miptree_set_fast_clear_state(brw
, irb
->mt
, irb
->mt_level
,
846 logical_layer
, num_layers
,
847 INTEL_FAST_CLEAR_STATE_CLEAR
);
849 DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__
,
850 irb
->mt
, irb
->mt_level
, irb
->mt_layer
, num_layers
);
852 struct isl_surf isl_tmp
[2];
853 struct blorp_surf surf
;
854 blorp_surf_for_miptree(brw
, &surf
, irb
->mt
, true, true,
855 (1 << ISL_AUX_USAGE_MCS
) |
856 (1 << ISL_AUX_USAGE_CCS_E
) |
857 (1 << ISL_AUX_USAGE_CCS_D
),
858 &level
, logical_layer
, num_layers
, isl_tmp
);
860 union isl_color_value clear_color
;
861 memcpy(clear_color
.f32
, ctx
->Color
.ClearColor
.f
, sizeof(float) * 4);
863 struct blorp_batch batch
;
864 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
865 blorp_clear(&batch
, &surf
,
866 brw
->render_target_format
[format
],
867 ISL_SWIZZLE_IDENTITY
,
868 level
, irb_logical_mt_layer(irb
), num_layers
,
870 clear_color
, color_write_disable
);
871 blorp_batch_finish(&batch
);
878 brw_blorp_clear_color(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
879 GLbitfield mask
, bool partial_clear
, bool encode_srgb
)
881 for (unsigned buf
= 0; buf
< fb
->_NumColorDrawBuffers
; buf
++) {
882 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[buf
];
883 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
885 /* Only clear the buffers present in the provided mask */
886 if (((1 << fb
->_ColorDrawBufferIndexes
[buf
]) & mask
) == 0)
889 /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
890 * the framebuffer can be complete with some attachments missing. In
891 * this case the _ColorDrawBuffers pointer will be NULL.
896 if (!do_single_blorp_clear(brw
, fb
, rb
, buf
, partial_clear
,
901 irb
->need_downsample
= true;
908 brw_blorp_resolve_color(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
909 unsigned level
, unsigned layer
,
910 enum blorp_fast_clear_op resolve_op
)
912 DBG("%s to mt %p level %u layer %u\n", __FUNCTION__
, mt
, level
, layer
);
914 const mesa_format format
= _mesa_get_srgb_format_linear(mt
->format
);
916 struct isl_surf isl_tmp
[2];
917 struct blorp_surf surf
;
918 blorp_surf_for_miptree(brw
, &surf
, mt
, true, false, 0,
919 &level
, layer
, 1 /* num_layers */,
922 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
924 * "Any transition from any value in {Clear, Render, Resolve} to a
925 * different value in {Clear, Render, Resolve} requires end of pipe
928 * In other words, fast clear ops are not properly synchronized with
929 * other drawing. We need to use a PIPE_CONTROL to ensure that the
930 * contents of the previous draw hit the render target before we resolve
931 * and again afterwards to ensure that the resolve is complete before we
932 * do any more regular drawing.
934 brw_emit_pipe_control_flush(brw
,
935 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
936 PIPE_CONTROL_CS_STALL
);
939 struct blorp_batch batch
;
940 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
941 blorp_ccs_resolve(&batch
, &surf
, level
, layer
,
942 brw_blorp_to_isl_format(brw
, format
, true),
944 blorp_batch_finish(&batch
);
946 /* See comment above */
947 brw_emit_pipe_control_flush(brw
,
948 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
949 PIPE_CONTROL_CS_STALL
);
953 * Perform a HiZ or depth resolve operation.
955 * For an overview of HiZ ops, see the following sections of the Sandy Bridge
956 * PRM, Volume 1, Part 2:
957 * - 7.5.3.1 Depth Buffer Clear
958 * - 7.5.3.2 Depth Buffer Resolve
959 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
962 intel_hiz_exec(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
963 unsigned int level
, unsigned int start_layer
,
964 unsigned int num_layers
, enum blorp_hiz_op op
)
966 assert(intel_miptree_level_has_hiz(mt
, level
));
967 assert(op
!= BLORP_HIZ_OP_NONE
);
968 const char *opname
= NULL
;
971 case BLORP_HIZ_OP_DEPTH_RESOLVE
:
972 opname
= "depth resolve";
974 case BLORP_HIZ_OP_HIZ_RESOLVE
:
975 opname
= "hiz ambiguate";
977 case BLORP_HIZ_OP_DEPTH_CLEAR
:
978 opname
= "depth clear";
980 case BLORP_HIZ_OP_NONE
:
985 DBG("%s %s to mt %p level %d layers %d-%d\n",
986 __func__
, opname
, mt
, level
, start_layer
, start_layer
+ num_layers
- 1);
988 /* The following stalls and flushes are only documented to be required for
989 * HiZ clear operations. However, they also seem to be required for the
990 * HiZ resolve operation which is basically the same as a fast clear only a
991 * different value is written into the HiZ surface.
993 if (op
== BLORP_HIZ_OP_DEPTH_CLEAR
|| op
== BLORP_HIZ_OP_HIZ_RESOLVE
) {
995 /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
997 * "If other rendering operations have preceded this clear, a
998 * PIPE_CONTROL with write cache flush enabled and Z-inhibit
999 * disabled must be issued before the rectangle primitive used for
1000 * the depth buffer clear operation.
1002 brw_emit_pipe_control_flush(brw
,
1003 PIPE_CONTROL_RENDER_TARGET_FLUSH
|
1004 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1005 PIPE_CONTROL_CS_STALL
);
1006 } else if (brw
->gen
>= 7) {
1008 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
1010 * If other rendering operations have preceded this clear, a
1011 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1012 * enabled must be issued before the rectangle primitive used for
1013 * the depth buffer clear operation.
1015 * Same applies for Gen8 and Gen9.
1017 * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
1018 * PIPE_CONTROL, Depth Cache Flush Enable:
1020 * This bit must not be set when Depth Stall Enable bit is set in
1023 * This is confirmed to hold for real, HSW gets immediate gpu hangs.
1025 * Therefore issue two pipe control flushes, one for cache flush and
1026 * another for depth stall.
1028 brw_emit_pipe_control_flush(brw
,
1029 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1030 PIPE_CONTROL_CS_STALL
);
1032 brw_emit_pipe_control_flush(brw
, PIPE_CONTROL_DEPTH_STALL
);
1037 struct isl_surf isl_tmp
[2];
1038 struct blorp_surf surf
;
1039 blorp_surf_for_miptree(brw
, &surf
, mt
, true, false, 0,
1040 &level
, start_layer
, num_layers
, isl_tmp
);
1042 struct blorp_batch batch
;
1043 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
1044 blorp_hiz_op(&batch
, &surf
, level
, start_layer
, num_layers
, op
);
1045 blorp_batch_finish(&batch
);
1047 /* The following stalls and flushes are only documented to be required for
1048 * HiZ clear operations. However, they also seem to be required for the
1049 * HiZ resolve operation which is basically the same as a fast clear only a
1050 * different value is written into the HiZ surface.
1052 if (op
== BLORP_HIZ_OP_DEPTH_CLEAR
|| op
== BLORP_HIZ_OP_HIZ_RESOLVE
) {
1053 if (brw
->gen
== 6) {
1054 /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
1056 * "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be
1057 * followed by a PIPE_CONTROL command with DEPTH_STALL bit set
1058 * and Then followed by Depth FLUSH'
1060 brw_emit_pipe_control_flush(brw
,
1061 PIPE_CONTROL_DEPTH_STALL
);
1063 brw_emit_pipe_control_flush(brw
,
1064 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1065 PIPE_CONTROL_CS_STALL
);
1066 } else if (brw
->gen
>= 8) {
1068 * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
1070 * "Depth buffer clear pass using any of the methods (WM_STATE,
1071 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
1072 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
1073 * "set" before starting to render. DepthStall and DepthFlush are
1074 * not needed between consecutive depth clear passes nor is it
1075 * required if the depth clear pass was done with
1076 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
1078 * TODO: Such as the spec says, this could be conditional.
1080 brw_emit_pipe_control_flush(brw
,
1081 PIPE_CONTROL_DEPTH_CACHE_FLUSH
|
1082 PIPE_CONTROL_DEPTH_STALL
);