2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "brw_context.h"
25 #include "intel_fbo.h"
26 #include "brw_meta_util.h"
27 #include "brw_state.h"
28 #include "main/blend.h"
29 #include "main/fbobject.h"
30 #include "util/format_srgb.h"
33 * Helper function for handling mirror image blits.
35 * If coord0 > coord1, swap them and invert the "mirror" boolean.
38 fixup_mirroring(bool *mirror
, float *coord0
, float *coord1
)
40 if (*coord0
> *coord1
) {
49 * Compute the number of pixels to clip for each side of a rect
51 * \param x0 The rect's left coordinate
52 * \param y0 The rect's bottom coordinate
53 * \param x1 The rect's right coordinate
54 * \param y1 The rect's top coordinate
55 * \param min_x The clipping region's left coordinate
56 * \param min_y The clipping region's bottom coordinate
57 * \param max_x The clipping region's right coordinate
58 * \param max_y The clipping region's top coordinate
59 * \param clipped_x0 The number of pixels to clip from the left side
60 * \param clipped_y0 The number of pixels to clip from the bottom side
61 * \param clipped_x1 The number of pixels to clip from the right side
62 * \param clipped_y1 The number of pixels to clip from the top side
64 * \return false if we clip everything away, true otherwise
67 compute_pixels_clipped(float x0
, float y0
, float x1
, float y1
,
68 float min_x
, float min_y
, float max_x
, float max_y
,
69 float *clipped_x0
, float *clipped_y0
, float *clipped_x1
, float *clipped_y1
)
71 /* If we are going to clip everything away, stop. */
72 if (!(min_x
<= max_x
&&
84 *clipped_x0
= min_x
- x0
;
88 *clipped_x1
= x1
- max_x
;
93 *clipped_y0
= min_y
- y0
;
97 *clipped_y1
= y1
- max_y
;
105 * Clips a coordinate (left, right, top or bottom) for the src or dst rect
106 * (whichever requires the largest clip) and adjusts the coordinate
107 * for the other rect accordingly.
109 * \param mirror true if mirroring is required
110 * \param src the source rect coordinate (for example srcX0)
111 * \param dst0 the dst rect coordinate (for example dstX0)
112 * \param dst1 the opposite dst rect coordinate (for example dstX1)
113 * \param clipped_src0 number of pixels to clip from the src coordinate
114 * \param clipped_dst0 number of pixels to clip from the dst coordinate
115 * \param clipped_dst1 number of pixels to clip from the opposite dst coordinate
116 * \param scale the src vs dst scale involved for that coordinate
117 * \param isLeftOrBottom true if we are clipping the left or bottom sides
121 clip_coordinates(bool mirror
,
122 float *src
, float *dst0
, float *dst1
,
129 /* When clipping we need to add or subtract pixels from the original
130 * coordinates depending on whether we are acting on the left/bottom
131 * or right/top sides of the rect respectively. We assume we have to
132 * add them in the code below, and multiply by -1 when we should
135 int mult
= isLeftOrBottom
? 1 : -1;
138 if (clipped_src0
>= clipped_dst0
* scale
) {
139 *src
+= clipped_src0
* mult
;
140 *dst0
+= clipped_src0
/ scale
* mult
;
142 *dst0
+= clipped_dst0
* mult
;
143 *src
+= clipped_dst0
* scale
* mult
;
146 if (clipped_src0
>= clipped_dst1
* scale
) {
147 *src
+= clipped_src0
* mult
;
148 *dst1
-= clipped_src0
/ scale
* mult
;
150 *dst1
-= clipped_dst1
* mult
;
151 *src
+= clipped_dst1
* scale
* mult
;
157 brw_meta_mirror_clip_and_scissor(const struct gl_context
*ctx
,
158 const struct gl_framebuffer
*read_fb
,
159 const struct gl_framebuffer
*draw_fb
,
160 GLfloat
*srcX0
, GLfloat
*srcY0
,
161 GLfloat
*srcX1
, GLfloat
*srcY1
,
162 GLfloat
*dstX0
, GLfloat
*dstY0
,
163 GLfloat
*dstX1
, GLfloat
*dstY1
,
164 bool *mirror_x
, bool *mirror_y
)
169 /* Detect if the blit needs to be mirrored */
170 fixup_mirroring(mirror_x
, srcX0
, srcX1
);
171 fixup_mirroring(mirror_x
, dstX0
, dstX1
);
172 fixup_mirroring(mirror_y
, srcY0
, srcY1
);
173 fixup_mirroring(mirror_y
, dstY0
, dstY1
);
175 /* Compute number of pixels to clip for each side of both rects. Return
176 * early if we are going to clip everything away.
187 if (!compute_pixels_clipped(*srcX0
, *srcY0
, *srcX1
, *srcY1
,
188 0, 0, read_fb
->Width
, read_fb
->Height
,
189 &clip_src_x0
, &clip_src_y0
, &clip_src_x1
, &clip_src_y1
))
192 if (!compute_pixels_clipped(*dstX0
, *dstY0
, *dstX1
, *dstY1
,
193 draw_fb
->_Xmin
, draw_fb
->_Ymin
, draw_fb
->_Xmax
, draw_fb
->_Ymax
,
194 &clip_dst_x0
, &clip_dst_y0
, &clip_dst_x1
, &clip_dst_y1
))
197 /* When clipping any of the two rects we need to adjust the coordinates in
198 * the other rect considering the scaling factor involved. To obtain the best
199 * precision we want to make sure that we only clip once per side to avoid
200 * accumulating errors due to the scaling adjustment.
202 * For example, if srcX0 and dstX0 need both to be clipped we want to avoid
203 * the situation where we clip srcX0 first, then adjust dstX0 accordingly
204 * but then we realize that the resulting dstX0 still needs to be clipped,
205 * so we clip dstX0 and adjust srcX0 again. Because we are applying scaling
206 * factors to adjust the coordinates in each clipping pass we lose some
207 * precision and that can affect the results of the blorp blit operation
208 * slightly. What we want to do here is detect the rect that we should
209 * clip first for each side so that when we adjust the other rect we ensure
210 * the resulting coordinate does not need to be clipped again.
212 * The code below implements this by comparing the number of pixels that
213 * we need to clip for each side of both rects considering the scales
214 * involved. For example, clip_src_x0 represents the number of pixels to be
215 * clipped for the src rect's left side, so if clip_src_x0 = 5,
216 * clip_dst_x0 = 4 and scaleX = 2 it means that we are clipping more from
217 * the dst rect so we should clip dstX0 only and adjust srcX0. This is
218 * because clipping 4 pixels in the dst is equivalent to clipping
219 * 4 * 2 = 8 > 5 in the src.
222 float scaleX
= (float) (*srcX1
- *srcX0
) / (*dstX1
- *dstX0
);
223 float scaleY
= (float) (*srcY1
- *srcY0
) / (*dstY1
- *dstY0
);
226 clip_coordinates(*mirror_x
,
228 clip_src_x0
, clip_dst_x0
, clip_dst_x1
,
231 /* Clip right side */
232 clip_coordinates(*mirror_x
,
234 clip_src_x1
, clip_dst_x1
, clip_dst_x0
,
237 /* Clip bottom side */
238 clip_coordinates(*mirror_y
,
240 clip_src_y0
, clip_dst_y0
, clip_dst_y1
,
244 clip_coordinates(*mirror_y
,
246 clip_src_y1
, clip_dst_y1
, clip_dst_y0
,
249 /* Account for the fact that in the system framebuffer, the origin is at
252 if (_mesa_is_winsys_fbo(read_fb
)) {
253 GLint tmp
= read_fb
->Height
- *srcY0
;
254 *srcY0
= read_fb
->Height
- *srcY1
;
256 *mirror_y
= !*mirror_y
;
258 if (_mesa_is_winsys_fbo(draw_fb
)) {
259 GLint tmp
= draw_fb
->Height
- *dstY0
;
260 *dstY0
= draw_fb
->Height
- *dstY1
;
262 *mirror_y
= !*mirror_y
;
269 * Creates a new named renderbuffer that wraps the first slice
270 * of an existing miptree.
272 * Clobbers the current renderbuffer binding (ctx->CurrentRenderbuffer).
274 struct gl_renderbuffer
*
275 brw_get_rb_for_slice(struct brw_context
*brw
,
276 struct intel_mipmap_tree
*mt
,
277 unsigned level
, unsigned layer
, bool flat
)
279 struct gl_context
*ctx
= &brw
->ctx
;
280 struct gl_renderbuffer
*rb
= ctx
->Driver
.NewRenderbuffer(ctx
, 0xDEADBEEF);
281 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
284 rb
->Format
= mt
->format
;
285 rb
->_BaseFormat
= _mesa_get_format_base_format(mt
->format
);
287 /* Program takes care of msaa and mip-level access manually for stencil.
288 * The surface is also treated as Y-tiled instead of as W-tiled calling for
289 * twice the width and half the height in dimensions.
292 const unsigned halign_stencil
= 8;
295 rb
->Width
= ALIGN(mt
->total_width
, halign_stencil
) * 2;
296 rb
->Height
= (mt
->total_height
/ mt
->physical_depth0
) / 2;
299 rb
->NumSamples
= mt
->num_samples
;
300 rb
->Width
= mt
->logical_width0
;
301 rb
->Height
= mt
->logical_height0
;
302 irb
->mt_level
= level
;
305 irb
->mt_layer
= layer
;
307 intel_miptree_reference(&irb
->mt
, mt
);
313 * Determine if fast color clear supports the given clear color.
315 * Fast color clear can only clear to color values of 1.0 or 0.0. At the
316 * moment we only support floating point, unorm, and snorm buffers.
319 brw_is_color_fast_clear_compatible(struct brw_context
*brw
,
320 const struct intel_mipmap_tree
*mt
,
321 const union gl_color_union
*color
)
323 const struct gl_context
*ctx
= &brw
->ctx
;
325 /* If we're mapping the render format to a different format than the
326 * format we use for texturing then it is a bit questionable whether it
327 * should be possible to use a fast clear. Although we only actually
328 * render using a renderable format, without the override workaround it
329 * wouldn't be possible to have a non-renderable surface in a fast clear
330 * state so the hardware probably legitimately doesn't need to support
331 * this case. At least on Gen9 this really does seem to cause problems.
334 brw_format_for_mesa_format(mt
->format
) !=
335 brw
->render_target_format
[mt
->format
])
338 /* Gen9 doesn't support fast clear on single-sampled SRGB buffers. When
339 * GL_FRAMEBUFFER_SRGB is enabled any color renderbuffers will be
340 * resolved in intel_update_state. In that case it's pointless to do a
341 * fast clear because it's very likely to be immediately resolved.
344 mt
->num_samples
<= 1 &&
345 ctx
->Color
.sRGBEnabled
&&
346 _mesa_get_srgb_format_linear(mt
->format
) != mt
->format
)
349 const mesa_format format
= _mesa_get_render_format(ctx
, mt
->format
);
350 if (_mesa_is_format_integer_color(format
)) {
352 perf_debug("Integer fast clear not enabled for (%s)",
353 _mesa_get_format_name(format
));
358 for (int i
= 0; i
< 4; i
++) {
359 if (!_mesa_format_has_color_component(format
, i
)) {
364 color
->f
[i
] != 0.0f
&& color
->f
[i
] != 1.0f
) {
372 * Convert the given color to a bitfield suitable for ORing into DWORD 7 of
373 * SURFACE_STATE (DWORD 12-15 on SKL+).
375 * Returned boolean tells if the given color differs from the stored.
378 brw_meta_set_fast_clear_color(struct brw_context
*brw
,
379 struct intel_mipmap_tree
*mt
,
380 const union gl_color_union
*color
)
382 union gl_color_union override_color
= *color
;
384 /* The sampler doesn't look at the format of the surface when the fast
385 * clear color is used so we need to implement luminance, intensity and
386 * missing components manually.
388 switch (_mesa_get_format_base_format(mt
->format
)) {
390 override_color
.ui
[3] = override_color
.ui
[0];
393 case GL_LUMINANCE_ALPHA
:
394 override_color
.ui
[1] = override_color
.ui
[0];
395 override_color
.ui
[2] = override_color
.ui
[0];
398 for (int i
= 0; i
< 3; i
++) {
399 if (!_mesa_format_has_color_component(mt
->format
, i
))
400 override_color
.ui
[i
] = 0;
405 if (!_mesa_format_has_color_component(mt
->format
, 3)) {
406 if (_mesa_is_format_integer_color(mt
->format
))
407 override_color
.ui
[3] = 1;
409 override_color
.f
[3] = 1.0f
;
412 /* Handle linear→SRGB conversion */
413 if (brw
->ctx
.Color
.sRGBEnabled
&&
414 _mesa_get_srgb_format_linear(mt
->format
) != mt
->format
) {
415 for (int i
= 0; i
< 3; i
++) {
416 override_color
.f
[i
] =
417 util_format_linear_to_srgb_float(override_color
.f
[i
]);
423 updated
= memcmp(&mt
->gen9_fast_clear_color
, &override_color
,
424 sizeof(mt
->gen9_fast_clear_color
));
425 mt
->gen9_fast_clear_color
= override_color
;
427 const uint32_t old_color_value
= mt
->fast_clear_color_value
;
429 mt
->fast_clear_color_value
= 0;
430 for (int i
= 0; i
< 4; i
++) {
431 /* Testing for non-0 works for integer and float colors */
432 if (override_color
.f
[i
] != 0.0f
) {
433 mt
->fast_clear_color_value
|=
434 1 << (GEN7_SURFACE_CLEAR_COLOR_SHIFT
+ (3 - i
));
438 updated
= (old_color_value
!= mt
->fast_clear_color_value
);
445 brw_get_fast_clear_rect(const struct brw_context
*brw
,
446 const struct gl_framebuffer
*fb
,
447 const struct intel_mipmap_tree
* mt
,
448 unsigned *x0
, unsigned *y0
,
449 unsigned *x1
, unsigned *y1
)
451 unsigned int x_align
, y_align
;
452 unsigned int x_scaledown
, y_scaledown
;
454 /* Only single sampled surfaces need to (and actually can) be resolved. */
455 if (mt
->msaa_layout
== INTEL_MSAA_LAYOUT_NONE
||
456 intel_miptree_is_lossless_compressed(brw
, mt
)) {
457 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
458 * Target(s)", beneath the "Fast Color Clear" bullet (p327):
460 * Clear pass must have a clear rectangle that must follow
461 * alignment rules in terms of pixels and lines as shown in the
462 * table below. Further, the clear-rectangle height and width
463 * must be multiple of the following dimensions. If the height
464 * and width of the render target being cleared do not meet these
465 * requirements, an MCS buffer can be created such that it
466 * follows the requirement and covers the RT.
468 * The alignment size in the table that follows is related to the
469 * alignment size returned by intel_get_non_msrt_mcs_alignment(), but
470 * with X alignment multiplied by 16 and Y alignment multiplied by 32.
472 intel_get_non_msrt_mcs_alignment(mt
, &x_align
, &y_align
);
475 /* SKL+ line alignment requirement for Y-tiled are half those of the prior
483 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
484 * Target(s)", beneath the "Fast Color Clear" bullet (p327):
486 * In order to optimize the performance MCS buffer (when bound to
487 * 1X RT) clear similarly to MCS buffer clear for MSRT case,
488 * clear rect is required to be scaled by the following factors
489 * in the horizontal and vertical directions:
491 * The X and Y scale down factors in the table that follows are each
492 * equal to half the alignment value computed above.
494 x_scaledown
= x_align
/ 2;
495 y_scaledown
= y_align
/ 2;
497 /* From BSpec: 3D-Media-GPGPU Engine > 3D Pipeline > Pixel > Pixel
498 * Backend > MCS Buffer for Render Target(s) [DevIVB+] > Table "Color
499 * Clear of Non-MultiSampled Render Target Restrictions":
501 * Clear rectangle must be aligned to two times the number of
502 * pixels in the table shown below due to 16x16 hashing across the
508 /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for Render
509 * Target(s)", beneath the "MSAA Compression" bullet (p326):
511 * Clear pass for this case requires that scaled down primitive
512 * is sent down with upper left co-ordinate to coincide with
513 * actual rectangle being cleared. For MSAA, clear rectangle’s
514 * height and width need to as show in the following table in
515 * terms of (width,height) of the RT.
517 * MSAA Width of Clear Rect Height of Clear Rect
518 * 2X Ceil(1/8*width) Ceil(1/2*height)
519 * 4X Ceil(1/8*width) Ceil(1/2*height)
520 * 8X Ceil(1/2*width) Ceil(1/2*height)
521 * 16X width Ceil(1/2*height)
523 * The text "with upper left co-ordinate to coincide with actual
524 * rectangle being cleared" is a little confusing--it seems to imply
525 * that to clear a rectangle from (x,y) to (x+w,y+h), one needs to
526 * feed the pipeline using the rectangle (x,y) to
527 * (x+Ceil(w/N),y+Ceil(h/2)), where N is either 2 or 8 depending on
528 * the number of samples. Experiments indicate that this is not
529 * quite correct; actually, what the hardware appears to do is to
530 * align whatever rectangle is sent down the pipeline to the nearest
531 * multiple of 2x2 blocks, and then scale it up by a factor of N
532 * horizontally and 2 vertically. So the resulting alignment is 4
533 * vertically and either 4 or 16 horizontally, and the scaledown
534 * factor is 2 vertically and either 2 or 8 horizontally.
536 switch (mt
->num_samples
) {
548 unreachable("Unexpected sample count for fast clear");
551 x_align
= x_scaledown
* 2;
552 y_align
= y_scaledown
* 2;
561 *y0
= fb
->Height
- fb
->_Ymax
;
562 *y1
= fb
->Height
- fb
->_Ymin
;
565 *x0
= ROUND_DOWN_TO(*x0
, x_align
) / x_scaledown
;
566 *y0
= ROUND_DOWN_TO(*y0
, y_align
) / y_scaledown
;
567 *x1
= ALIGN(*x1
, x_align
) / x_scaledown
;
568 *y1
= ALIGN(*y1
, y_align
) / y_scaledown
;
572 brw_meta_get_buffer_rect(const struct gl_framebuffer
*fb
,
573 unsigned *x0
, unsigned *y0
,
574 unsigned *x1
, unsigned *y1
)
582 *y0
= fb
->Height
- fb
->_Ymax
;
583 *y1
= fb
->Height
- fb
->_Ymin
;
588 brw_get_resolve_rect(const struct brw_context
*brw
,
589 const struct intel_mipmap_tree
*mt
,
590 unsigned *x0
, unsigned *y0
,
591 unsigned *x1
, unsigned *y1
)
593 unsigned x_align
, y_align
;
594 unsigned x_scaledown
, y_scaledown
;
596 /* From the Ivy Bridge PRM, Vol2 Part1 11.9 "Render Target Resolve":
598 * A rectangle primitive must be scaled down by the following factors
599 * with respect to render target being resolved.
601 * The scaledown factors in the table that follows are related to the
602 * alignment size returned by intel_get_non_msrt_mcs_alignment() by a
603 * multiplier. For IVB and HSW, we divide by two, for BDW we multiply
604 * by 8 and 16. Similar to the fast clear, SKL eases the BDW vertical scaling
608 intel_get_non_msrt_mcs_alignment(mt
, &x_align
, &y_align
);
610 x_scaledown
= x_align
* 8;
611 y_scaledown
= y_align
* 8;
612 } else if (brw
->gen
>= 8) {
613 x_scaledown
= x_align
* 8;
614 y_scaledown
= y_align
* 16;
616 x_scaledown
= x_align
/ 2;
617 y_scaledown
= y_align
/ 2;
620 *x1
= ALIGN(mt
->logical_width0
, x_scaledown
) / x_scaledown
;
621 *y1
= ALIGN(mt
->logical_height0
, y_scaledown
) / y_scaledown
;