2 * Copyright © 2012 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "main/context.h"
25 #include "main/teximage.h"
26 #include "main/blend.h"
27 #include "main/fbobject.h"
28 #include "main/renderbuffer.h"
29 #include "main/glformats.h"
31 #include "brw_blorp.h"
32 #include "brw_context.h"
33 #include "brw_defines.h"
34 #include "brw_meta_util.h"
35 #include "brw_state.h"
36 #include "intel_fbo.h"
37 #include "intel_debug.h"
39 #define FILE_DEBUG_FLAG DEBUG_BLORP
42 brw_blorp_lookup_shader(struct blorp_context
*blorp
,
43 const void *key
, uint32_t key_size
,
44 uint32_t *kernel_out
, void *prog_data_out
)
46 struct brw_context
*brw
= blorp
->driver_ctx
;
47 return brw_search_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
,
48 key
, key_size
, kernel_out
, prog_data_out
);
52 brw_blorp_upload_shader(struct blorp_context
*blorp
,
53 const void *key
, uint32_t key_size
,
54 const void *kernel
, uint32_t kernel_size
,
55 const struct brw_stage_prog_data
*prog_data
,
56 uint32_t prog_data_size
,
57 uint32_t *kernel_out
, void *prog_data_out
)
59 struct brw_context
*brw
= blorp
->driver_ctx
;
60 brw_upload_cache(&brw
->cache
, BRW_CACHE_BLORP_PROG
, key
, key_size
,
61 kernel
, kernel_size
, prog_data
, prog_data_size
,
62 kernel_out
, prog_data_out
);
66 brw_blorp_init(struct brw_context
*brw
)
68 blorp_init(&brw
->blorp
, brw
, &brw
->isl_dev
);
70 brw
->blorp
.compiler
= brw
->screen
->compiler
;
74 brw
->blorp
.mocs
.tex
= 0;
75 brw
->blorp
.mocs
.rb
= 0;
76 brw
->blorp
.mocs
.vb
= 0;
77 brw
->blorp
.exec
= gen6_blorp_exec
;
80 brw
->blorp
.mocs
.tex
= GEN7_MOCS_L3
;
81 brw
->blorp
.mocs
.rb
= GEN7_MOCS_L3
;
82 brw
->blorp
.mocs
.vb
= GEN7_MOCS_L3
;
83 if (brw
->is_haswell
) {
84 brw
->blorp
.exec
= gen75_blorp_exec
;
86 brw
->blorp
.exec
= gen7_blorp_exec
;
90 brw
->blorp
.mocs
.tex
= BDW_MOCS_WB
;
91 brw
->blorp
.mocs
.rb
= BDW_MOCS_PTE
;
92 brw
->blorp
.mocs
.vb
= BDW_MOCS_WB
;
93 brw
->blorp
.exec
= gen8_blorp_exec
;
96 brw
->blorp
.mocs
.tex
= SKL_MOCS_WB
;
97 brw
->blorp
.mocs
.rb
= SKL_MOCS_PTE
;
98 brw
->blorp
.mocs
.vb
= SKL_MOCS_WB
;
99 brw
->blorp
.exec
= gen9_blorp_exec
;
102 unreachable("Invalid gen");
105 brw
->blorp
.lookup_shader
= brw_blorp_lookup_shader
;
106 brw
->blorp
.upload_shader
= brw_blorp_upload_shader
;
110 apply_gen6_stencil_hiz_offset(struct isl_surf
*surf
,
111 struct intel_mipmap_tree
*mt
,
115 assert(mt
->array_layout
== ALL_SLICES_AT_EACH_LOD
);
117 if (mt
->format
== MESA_FORMAT_S_UINT8
) {
118 /* Note: we can't compute the stencil offset using
119 * intel_miptree_get_aligned_offset(), because the miptree
120 * claims that the region is untiled even though it's W tiled.
122 *offset
= mt
->level
[lod
].level_y
* mt
->pitch
+
123 mt
->level
[lod
].level_x
* 64;
125 *offset
= intel_miptree_get_aligned_offset(mt
,
126 mt
->level
[lod
].level_x
,
127 mt
->level
[lod
].level_y
);
130 surf
->logical_level0_px
.width
= minify(surf
->logical_level0_px
.width
, lod
);
131 surf
->logical_level0_px
.height
= minify(surf
->logical_level0_px
.height
, lod
);
132 surf
->phys_level0_sa
.width
= minify(surf
->phys_level0_sa
.width
, lod
);
133 surf
->phys_level0_sa
.height
= minify(surf
->phys_level0_sa
.height
, lod
);
135 surf
->array_pitch_el_rows
=
136 ALIGN(surf
->phys_level0_sa
.height
, surf
->image_alignment_el
.height
);
140 blorp_surf_for_miptree(struct brw_context
*brw
,
141 struct blorp_surf
*surf
,
142 struct intel_mipmap_tree
*mt
,
143 bool is_render_target
,
144 uint32_t safe_aux_usage
,
146 unsigned start_layer
, unsigned num_layers
,
147 struct isl_surf tmp_surfs
[2])
149 if (mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
150 mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
) {
151 const unsigned num_samples
= MAX2(1, mt
->num_samples
);
152 for (unsigned i
= 0; i
< num_layers
; i
++) {
153 for (unsigned s
= 0; s
< num_samples
; s
++) {
154 const unsigned phys_layer
= (start_layer
+ i
) * num_samples
+ s
;
155 intel_miptree_check_level_layer(mt
, *level
, phys_layer
);
159 for (unsigned i
= 0; i
< num_layers
; i
++)
160 intel_miptree_check_level_layer(mt
, *level
, start_layer
+ i
);
163 intel_miptree_get_isl_surf(brw
, mt
, &tmp_surfs
[0]);
164 surf
->surf
= &tmp_surfs
[0];
165 surf
->addr
= (struct blorp_address
) {
167 .offset
= mt
->offset
,
168 .read_domains
= is_render_target
? I915_GEM_DOMAIN_RENDER
:
169 I915_GEM_DOMAIN_SAMPLER
,
170 .write_domain
= is_render_target
? I915_GEM_DOMAIN_RENDER
: 0,
173 if (brw
->gen
== 6 && mt
->format
== MESA_FORMAT_S_UINT8
&&
174 mt
->array_layout
== ALL_SLICES_AT_EACH_LOD
) {
175 /* Sandy bridge stencil and HiZ use this ALL_SLICES_AT_EACH_LOD hack in
176 * order to allow for layered rendering. The hack makes each LOD of the
177 * stencil or HiZ buffer a single tightly packed array surface at some
178 * offset into the surface. Since ISL doesn't know how to deal with the
179 * crazy ALL_SLICES_AT_EACH_LOD layout and since we have to do a manual
180 * offset of it anyway, we might as well do the offset here and keep the
181 * hacks inside the i965 driver.
183 * See also gen6_depth_stencil_state.c
186 apply_gen6_stencil_hiz_offset(&tmp_surfs
[0], mt
, *level
, &offset
);
187 surf
->addr
.offset
+= offset
;
191 struct isl_surf
*aux_surf
= &tmp_surfs
[1];
192 intel_miptree_get_aux_isl_surf(brw
, mt
, aux_surf
, &surf
->aux_usage
);
194 if (surf
->aux_usage
!= ISL_AUX_USAGE_NONE
) {
195 if (surf
->aux_usage
== ISL_AUX_USAGE_HIZ
) {
196 /* If we're not going to use it as a depth buffer, resolve HiZ */
197 if (!(safe_aux_usage
& (1 << ISL_AUX_USAGE_HIZ
))) {
198 for (unsigned i
= 0; i
< num_layers
; i
++) {
199 intel_miptree_slice_resolve_depth(brw
, mt
, *level
,
202 /* If we're rendering to it then we'll need a HiZ resolve once
203 * we're done before we can use it with HiZ again.
205 if (is_render_target
)
206 intel_miptree_slice_set_needs_hiz_resolve(mt
, *level
,
209 surf
->aux_usage
= ISL_AUX_USAGE_NONE
;
211 } else if (!(safe_aux_usage
& (1 << surf
->aux_usage
))) {
213 if (safe_aux_usage
& (1 << ISL_AUX_USAGE_CCS_E
))
214 flags
|= INTEL_MIPTREE_IGNORE_CCS_E
;
216 intel_miptree_resolve_color(brw
, mt
, flags
);
218 assert(mt
->fast_clear_state
== INTEL_FAST_CLEAR_STATE_RESOLVED
);
219 surf
->aux_usage
= ISL_AUX_USAGE_NONE
;
223 if (is_render_target
) {
224 intel_miptree_used_for_rendering(mt
);
226 if (surf
->aux_usage
== ISL_AUX_USAGE_CCS_E
)
227 mt
->fast_clear_state
= INTEL_FAST_CLEAR_STATE_UNRESOLVED
;
230 if (surf
->aux_usage
!= ISL_AUX_USAGE_NONE
) {
231 /* We only really need a clear color if we also have an auxiliary
232 * surface. Without one, it does nothing.
234 surf
->clear_color
= intel_miptree_get_isl_clear_color(brw
, mt
);
236 surf
->aux_surf
= aux_surf
;
237 surf
->aux_addr
= (struct blorp_address
) {
238 .read_domains
= is_render_target
? I915_GEM_DOMAIN_RENDER
:
239 I915_GEM_DOMAIN_SAMPLER
,
240 .write_domain
= is_render_target
? I915_GEM_DOMAIN_RENDER
: 0,
244 surf
->aux_addr
.buffer
= mt
->mcs_buf
->bo
;
245 surf
->aux_addr
.offset
= mt
->mcs_buf
->offset
;
247 assert(surf
->aux_usage
== ISL_AUX_USAGE_HIZ
);
248 struct intel_mipmap_tree
*hiz_mt
= mt
->hiz_buf
->mt
;
250 surf
->aux_addr
.buffer
= hiz_mt
->bo
;
252 hiz_mt
->array_layout
== ALL_SLICES_AT_EACH_LOD
) {
253 /* gen6 requires the HiZ buffer to be manually offset to the
254 * right location. We could fixup the surf but it doesn't
255 * matter since most of those fields don't matter.
257 apply_gen6_stencil_hiz_offset(aux_surf
, hiz_mt
, *level
,
258 &surf
->aux_addr
.offset
);
260 surf
->aux_addr
.offset
= 0;
262 assert(hiz_mt
->pitch
== aux_surf
->row_pitch
);
264 surf
->aux_addr
.buffer
= mt
->hiz_buf
->aux_base
.bo
;
265 surf
->aux_addr
.offset
= mt
->hiz_buf
->aux_base
.offset
;
269 surf
->aux_addr
= (struct blorp_address
) {
272 memset(&surf
->clear_color
, 0, sizeof(surf
->clear_color
));
274 assert((surf
->aux_usage
== ISL_AUX_USAGE_NONE
) ==
275 (surf
->aux_addr
.buffer
== NULL
));
278 static enum isl_format
279 brw_blorp_to_isl_format(struct brw_context
*brw
, mesa_format format
,
280 bool is_render_target
)
283 case MESA_FORMAT_NONE
:
284 return ISL_FORMAT_UNSUPPORTED
;
285 case MESA_FORMAT_S_UINT8
:
286 return ISL_FORMAT_R8_UINT
;
287 case MESA_FORMAT_Z24_UNORM_X8_UINT
:
288 return ISL_FORMAT_R24_UNORM_X8_TYPELESS
;
289 case MESA_FORMAT_Z_FLOAT32
:
290 return ISL_FORMAT_R32_FLOAT
;
291 case MESA_FORMAT_Z_UNORM16
:
292 return ISL_FORMAT_R16_UNORM
;
294 if (is_render_target
) {
295 assert(brw
->format_supported_as_render_target
[format
]);
296 return brw
->render_target_format
[format
];
298 return brw_format_for_mesa_format(format
);
306 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
307 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
309 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
312 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
314 * which is simply adding 4 then modding by 8 (or anding with 7).
316 * We then may need to apply workarounds for textureGather hardware bugs.
318 static enum isl_channel_select
319 swizzle_to_scs(GLenum swizzle
)
321 return (enum isl_channel_select
)((swizzle
+ 4) & 7);
325 physical_to_logical_layer(struct intel_mipmap_tree
*mt
,
326 unsigned physical_layer
)
328 if (mt
->num_samples
> 1 &&
329 (mt
->msaa_layout
== INTEL_MSAA_LAYOUT_UMS
||
330 mt
->msaa_layout
== INTEL_MSAA_LAYOUT_CMS
)) {
331 assert(physical_layer
% mt
->num_samples
== 0);
332 return physical_layer
/ mt
->num_samples
;
334 return physical_layer
;
339 * Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using
340 * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
341 * the physical layer holding sample 0. So, for example, if
342 * src_mt->num_samples == 4, then logical layer n corresponds to src_layer ==
346 brw_blorp_blit_miptrees(struct brw_context
*brw
,
347 struct intel_mipmap_tree
*src_mt
,
348 unsigned src_level
, unsigned src_layer
,
349 mesa_format src_format
, int src_swizzle
,
350 struct intel_mipmap_tree
*dst_mt
,
351 unsigned dst_level
, unsigned dst_layer
,
352 mesa_format dst_format
,
353 float src_x0
, float src_y0
,
354 float src_x1
, float src_y1
,
355 float dst_x0
, float dst_y0
,
356 float dst_x1
, float dst_y1
,
357 GLenum filter
, bool mirror_x
, bool mirror_y
,
358 bool decode_srgb
, bool encode_srgb
)
360 /* Blorp operates in logical layers */
361 src_layer
= physical_to_logical_layer(src_mt
, src_layer
);
362 dst_layer
= physical_to_logical_layer(dst_mt
, dst_layer
);
364 DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f)"
365 "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
367 src_mt
->num_samples
, _mesa_get_format_name(src_mt
->format
), src_mt
,
368 src_level
, src_layer
, src_x0
, src_y0
, src_x1
, src_y1
,
369 dst_mt
->num_samples
, _mesa_get_format_name(dst_mt
->format
), dst_mt
,
370 dst_level
, dst_layer
, dst_x0
, dst_y0
, dst_x1
, dst_y1
,
373 if (!decode_srgb
&& _mesa_get_format_color_encoding(src_format
) == GL_SRGB
)
374 src_format
= _mesa_get_srgb_format_linear(src_format
);
376 if (!encode_srgb
&& _mesa_get_format_color_encoding(dst_format
) == GL_SRGB
)
377 dst_format
= _mesa_get_srgb_format_linear(dst_format
);
379 /* When doing a multisample resolve of a GL_LUMINANCE32F or GL_INTENSITY32F
380 * texture, the above code configures the source format for L32_FLOAT or
381 * I32_FLOAT, and the destination format for R32_FLOAT. On Sandy Bridge,
382 * the SAMPLE message appears to handle multisampled L32_FLOAT and
383 * I32_FLOAT textures incorrectly, resulting in blocky artifacts. So work
384 * around the problem by using a source format of R32_FLOAT. This
385 * shouldn't affect rendering correctness, since the destination format is
386 * R32_FLOAT, so only the contents of the red channel matters.
389 src_mt
->num_samples
> 1 && dst_mt
->num_samples
<= 1 &&
390 src_mt
->format
== dst_mt
->format
&&
391 (dst_format
== MESA_FORMAT_L_FLOAT32
||
392 dst_format
== MESA_FORMAT_I_FLOAT32
)) {
393 src_format
= dst_format
= MESA_FORMAT_R_FLOAT32
;
396 uint32_t src_usage_flags
= (1 << ISL_AUX_USAGE_MCS
);
397 if (src_format
== src_mt
->format
)
398 src_usage_flags
|= (1 << ISL_AUX_USAGE_CCS_E
);
400 uint32_t dst_usage_flags
= (1 << ISL_AUX_USAGE_MCS
);
401 if (dst_format
== dst_mt
->format
) {
402 dst_usage_flags
|= (1 << ISL_AUX_USAGE_CCS_E
) |
403 (1 << ISL_AUX_USAGE_CCS_D
);
406 struct isl_surf tmp_surfs
[4];
407 struct blorp_surf src_surf
, dst_surf
;
408 blorp_surf_for_miptree(brw
, &src_surf
, src_mt
, false, src_usage_flags
,
409 &src_level
, src_layer
, 1, &tmp_surfs
[0]);
410 blorp_surf_for_miptree(brw
, &dst_surf
, dst_mt
, true, dst_usage_flags
,
411 &dst_level
, dst_layer
, 1, &tmp_surfs
[2]);
413 struct isl_swizzle src_isl_swizzle
= {
414 .r
= swizzle_to_scs(GET_SWZ(src_swizzle
, 0)),
415 .g
= swizzle_to_scs(GET_SWZ(src_swizzle
, 1)),
416 .b
= swizzle_to_scs(GET_SWZ(src_swizzle
, 2)),
417 .a
= swizzle_to_scs(GET_SWZ(src_swizzle
, 3)),
420 struct blorp_batch batch
;
421 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
422 blorp_blit(&batch
, &src_surf
, src_level
, src_layer
,
423 brw_blorp_to_isl_format(brw
, src_format
, false), src_isl_swizzle
,
424 &dst_surf
, dst_level
, dst_layer
,
425 brw_blorp_to_isl_format(brw
, dst_format
, true),
426 ISL_SWIZZLE_IDENTITY
,
427 src_x0
, src_y0
, src_x1
, src_y1
,
428 dst_x0
, dst_y0
, dst_x1
, dst_y1
,
429 filter
, mirror_x
, mirror_y
);
430 blorp_batch_finish(&batch
);
434 brw_blorp_copy_miptrees(struct brw_context
*brw
,
435 struct intel_mipmap_tree
*src_mt
,
436 unsigned src_level
, unsigned src_layer
,
437 struct intel_mipmap_tree
*dst_mt
,
438 unsigned dst_level
, unsigned dst_layer
,
439 unsigned src_x
, unsigned src_y
,
440 unsigned dst_x
, unsigned dst_y
,
441 unsigned src_width
, unsigned src_height
)
443 DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d"
444 "to %dx %s mt %p %d %d (%d,%d)\n",
446 src_mt
->num_samples
, _mesa_get_format_name(src_mt
->format
), src_mt
,
447 src_level
, src_layer
, src_x
, src_y
, src_width
, src_height
,
448 dst_mt
->num_samples
, _mesa_get_format_name(dst_mt
->format
), dst_mt
,
449 dst_level
, dst_layer
, dst_x
, dst_y
);
451 struct isl_surf tmp_surfs
[4];
452 struct blorp_surf src_surf
, dst_surf
;
453 blorp_surf_for_miptree(brw
, &src_surf
, src_mt
, false,
454 (1 << ISL_AUX_USAGE_MCS
),
455 &src_level
, src_layer
, 1, &tmp_surfs
[0]);
456 blorp_surf_for_miptree(brw
, &dst_surf
, dst_mt
, true,
457 (1 << ISL_AUX_USAGE_MCS
),
458 &dst_level
, dst_layer
, 1, &tmp_surfs
[2]);
460 struct blorp_batch batch
;
461 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
462 blorp_copy(&batch
, &src_surf
, src_level
, src_layer
,
463 &dst_surf
, dst_level
, dst_layer
,
464 src_x
, src_y
, dst_x
, dst_y
, src_width
, src_height
);
465 blorp_batch_finish(&batch
);
468 static struct intel_mipmap_tree
*
469 find_miptree(GLbitfield buffer_bit
, struct intel_renderbuffer
*irb
)
471 struct intel_mipmap_tree
*mt
= irb
->mt
;
472 if (buffer_bit
== GL_STENCIL_BUFFER_BIT
&& mt
->stencil_mt
)
478 blorp_get_texture_swizzle(const struct intel_renderbuffer
*irb
)
480 return irb
->Base
.Base
._BaseFormat
== GL_RGB
?
481 MAKE_SWIZZLE4(SWIZZLE_X
, SWIZZLE_Y
, SWIZZLE_Z
, SWIZZLE_ONE
) :
486 do_blorp_blit(struct brw_context
*brw
, GLbitfield buffer_bit
,
487 struct intel_renderbuffer
*src_irb
, mesa_format src_format
,
488 struct intel_renderbuffer
*dst_irb
, mesa_format dst_format
,
489 GLfloat srcX0
, GLfloat srcY0
, GLfloat srcX1
, GLfloat srcY1
,
490 GLfloat dstX0
, GLfloat dstY0
, GLfloat dstX1
, GLfloat dstY1
,
491 GLenum filter
, bool mirror_x
, bool mirror_y
)
493 const struct gl_context
*ctx
= &brw
->ctx
;
495 /* Find source/dst miptrees */
496 struct intel_mipmap_tree
*src_mt
= find_miptree(buffer_bit
, src_irb
);
497 struct intel_mipmap_tree
*dst_mt
= find_miptree(buffer_bit
, dst_irb
);
499 const bool do_srgb
= ctx
->Color
.sRGBEnabled
;
502 brw_blorp_blit_miptrees(brw
,
503 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
504 src_format
, blorp_get_texture_swizzle(src_irb
),
505 dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
,
507 srcX0
, srcY0
, srcX1
, srcY1
,
508 dstX0
, dstY0
, dstX1
, dstY1
,
509 filter
, mirror_x
, mirror_y
,
512 dst_irb
->need_downsample
= true;
516 try_blorp_blit(struct brw_context
*brw
,
517 const struct gl_framebuffer
*read_fb
,
518 const struct gl_framebuffer
*draw_fb
,
519 GLfloat srcX0
, GLfloat srcY0
, GLfloat srcX1
, GLfloat srcY1
,
520 GLfloat dstX0
, GLfloat dstY0
, GLfloat dstX1
, GLfloat dstY1
,
521 GLenum filter
, GLbitfield buffer_bit
)
523 struct gl_context
*ctx
= &brw
->ctx
;
525 /* Sync up the state of window system buffers. We need to do this before
526 * we go looking for the buffers.
528 intel_prepare_render(brw
);
530 bool mirror_x
, mirror_y
;
531 if (brw_meta_mirror_clip_and_scissor(ctx
, read_fb
, draw_fb
,
532 &srcX0
, &srcY0
, &srcX1
, &srcY1
,
533 &dstX0
, &dstY0
, &dstX1
, &dstY1
,
534 &mirror_x
, &mirror_y
))
538 struct intel_renderbuffer
*src_irb
;
539 struct intel_renderbuffer
*dst_irb
;
540 struct intel_mipmap_tree
*src_mt
;
541 struct intel_mipmap_tree
*dst_mt
;
542 switch (buffer_bit
) {
543 case GL_COLOR_BUFFER_BIT
:
544 src_irb
= intel_renderbuffer(read_fb
->_ColorReadBuffer
);
545 for (unsigned i
= 0; i
< draw_fb
->_NumColorDrawBuffers
; ++i
) {
546 dst_irb
= intel_renderbuffer(draw_fb
->_ColorDrawBuffers
[i
]);
548 do_blorp_blit(brw
, buffer_bit
,
549 src_irb
, src_irb
->Base
.Base
.Format
,
550 dst_irb
, dst_irb
->Base
.Base
.Format
,
551 srcX0
, srcY0
, srcX1
, srcY1
,
552 dstX0
, dstY0
, dstX1
, dstY1
,
553 filter
, mirror_x
, mirror_y
);
556 case GL_DEPTH_BUFFER_BIT
:
558 intel_renderbuffer(read_fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
);
560 intel_renderbuffer(draw_fb
->Attachment
[BUFFER_DEPTH
].Renderbuffer
);
561 src_mt
= find_miptree(buffer_bit
, src_irb
);
562 dst_mt
= find_miptree(buffer_bit
, dst_irb
);
564 /* We can't handle format conversions between Z24 and other formats
565 * since we have to lie about the surface format. See the comments in
566 * brw_blorp_surface_info::set().
568 if ((src_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
) !=
569 (dst_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
))
572 do_blorp_blit(brw
, buffer_bit
, src_irb
, MESA_FORMAT_NONE
,
573 dst_irb
, MESA_FORMAT_NONE
, srcX0
, srcY0
,
574 srcX1
, srcY1
, dstX0
, dstY0
, dstX1
, dstY1
,
575 filter
, mirror_x
, mirror_y
);
577 case GL_STENCIL_BUFFER_BIT
:
579 intel_renderbuffer(read_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
);
581 intel_renderbuffer(draw_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
);
582 do_blorp_blit(brw
, buffer_bit
, src_irb
, MESA_FORMAT_NONE
,
583 dst_irb
, MESA_FORMAT_NONE
, srcX0
, srcY0
,
584 srcX1
, srcY1
, dstX0
, dstY0
, dstX1
, dstY1
,
585 filter
, mirror_x
, mirror_y
);
588 unreachable("not reached");
595 brw_blorp_copytexsubimage(struct brw_context
*brw
,
596 struct gl_renderbuffer
*src_rb
,
597 struct gl_texture_image
*dst_image
,
599 int srcX0
, int srcY0
,
600 int dstX0
, int dstY0
,
601 int width
, int height
)
603 struct gl_context
*ctx
= &brw
->ctx
;
604 struct intel_renderbuffer
*src_irb
= intel_renderbuffer(src_rb
);
605 struct intel_texture_image
*intel_image
= intel_texture_image(dst_image
);
607 /* No pixel transfer operations (zoom, bias, mapping), just a blit */
608 if (brw
->ctx
._ImageTransferState
)
611 /* Sync up the state of window system buffers. We need to do this before
612 * we go looking at the src renderbuffer's miptree.
614 intel_prepare_render(brw
);
616 struct intel_mipmap_tree
*src_mt
= src_irb
->mt
;
617 struct intel_mipmap_tree
*dst_mt
= intel_image
->mt
;
619 /* There is support for only up to eight samples. */
620 if (src_mt
->num_samples
> 8 || dst_mt
->num_samples
> 8)
623 /* BLORP is only supported from Gen6 onwards. */
627 if (_mesa_get_format_base_format(src_rb
->Format
) !=
628 _mesa_get_format_base_format(dst_image
->TexFormat
)) {
632 /* We can't handle format conversions between Z24 and other formats since
633 * we have to lie about the surface format. See the comments in
634 * brw_blorp_surface_info::set().
636 if ((src_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
) !=
637 (dst_mt
->format
== MESA_FORMAT_Z24_UNORM_X8_UINT
)) {
641 if (!brw
->format_supported_as_render_target
[dst_image
->TexFormat
])
644 /* Source clipping shouldn't be necessary, since copytexsubimage (in
645 * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
648 * Destination clipping shouldn't be necessary since the restrictions on
649 * glCopyTexSubImage prevent the user from specifying a destination rectangle
650 * that falls outside the bounds of the destination texture.
651 * See error_check_subtexture_dimensions().
654 int srcY1
= srcY0
+ height
;
655 int srcX1
= srcX0
+ width
;
656 int dstX1
= dstX0
+ width
;
657 int dstY1
= dstY0
+ height
;
659 /* Account for the fact that in the system framebuffer, the origin is at
662 bool mirror_y
= false;
663 if (_mesa_is_winsys_fbo(ctx
->ReadBuffer
)) {
664 GLint tmp
= src_rb
->Height
- srcY0
;
665 srcY0
= src_rb
->Height
- srcY1
;
670 /* Account for face selection and texture view MinLayer */
671 int dst_slice
= slice
+ dst_image
->TexObject
->MinLayer
+ dst_image
->Face
;
672 int dst_level
= dst_image
->Level
+ dst_image
->TexObject
->MinLevel
;
674 brw_blorp_blit_miptrees(brw
,
675 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
676 src_rb
->Format
, blorp_get_texture_swizzle(src_irb
),
677 dst_mt
, dst_level
, dst_slice
,
678 dst_image
->TexFormat
,
679 srcX0
, srcY0
, srcX1
, srcY1
,
680 dstX0
, dstY0
, dstX1
, dstY1
,
681 GL_NEAREST
, false, mirror_y
,
684 /* If we're copying to a packed depth stencil texture and the source
685 * framebuffer has separate stencil, we need to also copy the stencil data
688 src_rb
= ctx
->ReadBuffer
->Attachment
[BUFFER_STENCIL
].Renderbuffer
;
689 if (_mesa_get_format_bits(dst_image
->TexFormat
, GL_STENCIL_BITS
) > 0 &&
691 src_irb
= intel_renderbuffer(src_rb
);
692 src_mt
= src_irb
->mt
;
694 if (src_mt
->stencil_mt
)
695 src_mt
= src_mt
->stencil_mt
;
696 if (dst_mt
->stencil_mt
)
697 dst_mt
= dst_mt
->stencil_mt
;
699 if (src_mt
!= dst_mt
) {
700 brw_blorp_blit_miptrees(brw
,
701 src_mt
, src_irb
->mt_level
, src_irb
->mt_layer
,
703 blorp_get_texture_swizzle(src_irb
),
704 dst_mt
, dst_level
, dst_slice
,
706 srcX0
, srcY0
, srcX1
, srcY1
,
707 dstX0
, dstY0
, dstX1
, dstY1
,
708 GL_NEAREST
, false, mirror_y
,
718 brw_blorp_framebuffer(struct brw_context
*brw
,
719 struct gl_framebuffer
*readFb
,
720 struct gl_framebuffer
*drawFb
,
721 GLint srcX0
, GLint srcY0
, GLint srcX1
, GLint srcY1
,
722 GLint dstX0
, GLint dstY0
, GLint dstX1
, GLint dstY1
,
723 GLbitfield mask
, GLenum filter
)
725 /* BLORP is not supported before Gen6. */
729 static GLbitfield buffer_bits
[] = {
732 GL_STENCIL_BUFFER_BIT
,
735 for (unsigned int i
= 0; i
< ARRAY_SIZE(buffer_bits
); ++i
) {
736 if ((mask
& buffer_bits
[i
]) &&
737 try_blorp_blit(brw
, readFb
, drawFb
,
738 srcX0
, srcY0
, srcX1
, srcY1
,
739 dstX0
, dstY0
, dstX1
, dstY1
,
740 filter
, buffer_bits
[i
])) {
741 mask
&= ~buffer_bits
[i
];
749 set_write_disables(const struct intel_renderbuffer
*irb
,
750 const GLubyte
*color_mask
, bool *color_write_disable
)
752 /* Format information in the renderbuffer represents the requirements
753 * given by the client. There are cases where the backing miptree uses,
754 * for example, RGBA to represent RGBX. Since the client is only expecting
755 * RGB we can treat alpha as not used and write whatever we like into it.
757 const GLenum base_format
= irb
->Base
.Base
._BaseFormat
;
758 const int components
= _mesa_base_format_component_count(base_format
);
759 bool disables
= false;
761 assert(components
> 0);
763 for (int i
= 0; i
< components
; i
++) {
764 color_write_disable
[i
] = !color_mask
[i
];
765 disables
= disables
|| !color_mask
[i
];
772 irb_logical_mt_layer(struct intel_renderbuffer
*irb
)
774 return physical_to_logical_layer(irb
->mt
, irb
->mt_layer
);
778 do_single_blorp_clear(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
779 struct gl_renderbuffer
*rb
, unsigned buf
,
780 bool partial_clear
, bool encode_srgb
)
782 struct gl_context
*ctx
= &brw
->ctx
;
783 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
784 mesa_format format
= irb
->mt
->format
;
785 uint32_t x0
, x1
, y0
, y1
;
787 if (!encode_srgb
&& _mesa_get_format_color_encoding(format
) == GL_SRGB
)
788 format
= _mesa_get_srgb_format_linear(format
);
796 y0
= rb
->Height
- fb
->_Ymax
;
797 y1
= rb
->Height
- fb
->_Ymin
;
800 /* If the clear region is empty, just return. */
801 if (x0
== x1
|| y0
== y1
)
804 bool can_fast_clear
= !partial_clear
;
806 bool color_write_disable
[4] = { false, false, false, false };
807 if (set_write_disables(irb
, ctx
->Color
.ColorMask
[buf
], color_write_disable
))
808 can_fast_clear
= false;
810 if (irb
->mt
->fast_clear_state
== INTEL_FAST_CLEAR_STATE_NO_MCS
||
811 !brw_is_color_fast_clear_compatible(brw
, irb
->mt
, &ctx
->Color
.ClearColor
))
812 can_fast_clear
= false;
814 const bool is_lossless_compressed
= intel_miptree_is_lossless_compressed(
817 if (can_fast_clear
) {
818 /* Record the clear color in the miptree so that it will be
819 * programmed in SURFACE_STATE by later rendering and resolve
822 const bool color_updated
= brw_meta_set_fast_clear_color(
823 brw
, irb
->mt
, &ctx
->Color
.ClearColor
);
825 /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
826 * is redundant and can be skipped.
828 if (!color_updated
&&
829 irb
->mt
->fast_clear_state
== INTEL_FAST_CLEAR_STATE_CLEAR
)
832 /* If the MCS buffer hasn't been allocated yet, we need to allocate
835 if (!irb
->mt
->mcs_buf
) {
836 assert(!is_lossless_compressed
);
837 if (!intel_miptree_alloc_non_msrt_mcs(brw
, irb
->mt
, false)) {
838 /* MCS allocation failed--probably this will only happen in
839 * out-of-memory conditions. But in any case, try to recover
840 * by falling back to a non-blorp clear technique.
847 const unsigned logical_layer
= irb_logical_mt_layer(irb
);
848 const unsigned num_layers
= fb
->MaxNumLayers
? irb
->layer_count
: 1;
850 /* We can't setup the blorp_surf until we've allocated the MCS above */
851 struct isl_surf isl_tmp
[2];
852 struct blorp_surf surf
;
853 unsigned level
= irb
->mt_level
;
854 blorp_surf_for_miptree(brw
, &surf
, irb
->mt
, true,
855 (1 << ISL_AUX_USAGE_MCS
) |
856 (1 << ISL_AUX_USAGE_CCS_E
) |
857 (1 << ISL_AUX_USAGE_CCS_D
),
858 &level
, logical_layer
, num_layers
, isl_tmp
);
860 if (can_fast_clear
) {
861 DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__
,
862 irb
->mt
, irb
->mt_level
, irb
->mt_layer
, num_layers
);
864 struct blorp_batch batch
;
865 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
866 blorp_fast_clear(&batch
, &surf
,
867 (enum isl_format
)brw
->render_target_format
[format
],
868 level
, logical_layer
, num_layers
,
870 blorp_batch_finish(&batch
);
872 /* Now that the fast clear has occurred, put the buffer in
873 * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
876 irb
->mt
->fast_clear_state
= INTEL_FAST_CLEAR_STATE_CLEAR
;
878 DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__
,
879 irb
->mt
, irb
->mt_level
, irb
->mt_layer
, num_layers
);
881 union isl_color_value clear_color
;
882 memcpy(clear_color
.f32
, ctx
->Color
.ClearColor
.f
, sizeof(float) * 4);
884 struct blorp_batch batch
;
885 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
886 blorp_clear(&batch
, &surf
,
887 (enum isl_format
)brw
->render_target_format
[format
],
888 ISL_SWIZZLE_IDENTITY
,
889 level
, irb_logical_mt_layer(irb
), num_layers
,
891 clear_color
, color_write_disable
);
892 blorp_batch_finish(&batch
);
899 brw_blorp_clear_color(struct brw_context
*brw
, struct gl_framebuffer
*fb
,
900 GLbitfield mask
, bool partial_clear
, bool encode_srgb
)
902 for (unsigned buf
= 0; buf
< fb
->_NumColorDrawBuffers
; buf
++) {
903 struct gl_renderbuffer
*rb
= fb
->_ColorDrawBuffers
[buf
];
904 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
906 /* Only clear the buffers present in the provided mask */
907 if (((1 << fb
->_ColorDrawBufferIndexes
[buf
]) & mask
) == 0)
910 /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
911 * the framebuffer can be complete with some attachments missing. In
912 * this case the _ColorDrawBuffers pointer will be NULL.
917 if (!do_single_blorp_clear(brw
, fb
, rb
, buf
, partial_clear
,
922 irb
->need_downsample
= true;
929 brw_blorp_resolve_color(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
)
931 DBG("%s to mt %p\n", __FUNCTION__
, mt
);
933 const mesa_format format
= _mesa_get_srgb_format_linear(mt
->format
);
935 struct isl_surf isl_tmp
[2];
936 struct blorp_surf surf
;
938 blorp_surf_for_miptree(brw
, &surf
, mt
, true,
939 (1 << ISL_AUX_USAGE_CCS_E
) |
940 (1 << ISL_AUX_USAGE_CCS_D
),
941 &level
, 0 /* start_layer */, 1 /* num_layers */,
944 enum blorp_fast_clear_op resolve_op
;
946 if (surf
.aux_usage
== ISL_AUX_USAGE_CCS_E
)
947 resolve_op
= BLORP_FAST_CLEAR_OP_RESOLVE_FULL
;
949 resolve_op
= BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL
;
951 assert(surf
.aux_usage
== ISL_AUX_USAGE_CCS_D
);
952 /* Broadwell and earlier do not have a partial resolve */
953 resolve_op
= BLORP_FAST_CLEAR_OP_RESOLVE_FULL
;
956 struct blorp_batch batch
;
957 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
958 blorp_ccs_resolve(&batch
, &surf
, 0 /* level */, 0 /* layer */,
959 brw_blorp_to_isl_format(brw
, format
, true),
961 blorp_batch_finish(&batch
);
963 mt
->fast_clear_state
= INTEL_FAST_CLEAR_STATE_RESOLVED
;
967 gen6_blorp_hiz_exec(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
968 unsigned int level
, unsigned int layer
, enum blorp_hiz_op op
)
970 assert(intel_miptree_level_has_hiz(mt
, level
));
972 struct isl_surf isl_tmp
[2];
973 struct blorp_surf surf
;
974 blorp_surf_for_miptree(brw
, &surf
, mt
, true, (1 << ISL_AUX_USAGE_HIZ
),
975 &level
, layer
, 1, isl_tmp
);
977 struct blorp_batch batch
;
978 blorp_batch_init(&brw
->blorp
, &batch
, brw
, 0);
979 blorp_gen6_hiz_op(&batch
, &surf
, level
, layer
, op
);
980 blorp_batch_finish(&batch
);
984 * Perform a HiZ or depth resolve operation.
986 * For an overview of HiZ ops, see the following sections of the Sandy Bridge
987 * PRM, Volume 1, Part 2:
988 * - 7.5.3.1 Depth Buffer Clear
989 * - 7.5.3.2 Depth Buffer Resolve
990 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
993 intel_hiz_exec(struct brw_context
*brw
, struct intel_mipmap_tree
*mt
,
994 unsigned int level
, unsigned int layer
, enum blorp_hiz_op op
)
996 const char *opname
= NULL
;
999 case BLORP_HIZ_OP_DEPTH_RESOLVE
:
1000 opname
= "depth resolve";
1002 case BLORP_HIZ_OP_HIZ_RESOLVE
:
1003 opname
= "hiz ambiguate";
1005 case BLORP_HIZ_OP_DEPTH_CLEAR
:
1006 opname
= "depth clear";
1008 case BLORP_HIZ_OP_NONE
:
1013 DBG("%s %s to mt %p level %d layer %d\n",
1014 __func__
, opname
, mt
, level
, layer
);
1016 if (brw
->gen
>= 8) {
1017 gen8_hiz_exec(brw
, mt
, level
, layer
, op
);
1019 gen6_blorp_hiz_exec(brw
, mt
, level
, layer
, op
);