2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @file brw_meta_stencil_blit.c
27 * Implements upsampling, downsampling and scaling of stencil miptrees. The
28 * logic can be originally found in brw_blorp_blit.c.
29 * Implementation creates a temporary draw framebuffer object and attaches the
30 * destination stencil buffer attachment as color attachment. Source attachment
31 * is in turn treated as a stencil texture and the glsl program used for the
32 * blitting samples it using stencil-indexing.
34 * Unfortunately as the data port does not support interleaved msaa-surfaces
35 * (stencil is always IMS), the glsl program needs to handle the writing of
36 * individual samples manually. Surface is configured as if it were single
37 * sampled (with adjusted dimensions) and the glsl program extracts the
38 * sample indices from the input coordinates for correct texturing.
40 * Target surface is also configured as Y-tiled instead of W-tiled in order
41 * to support generations 6-7. Later hardware supports W-tiled as render target
42 * and the logic here could be simplified for those.
45 #include "brw_context.h"
46 #include "intel_batchbuffer.h"
47 #include "intel_fbo.h"
49 #include "main/blit.h"
50 #include "main/buffers.h"
51 #include "main/fbobject.h"
52 #include "main/uniforms.h"
53 #include "main/texparam.h"
54 #include "main/texobj.h"
55 #include "main/viewport.h"
56 #include "main/enable.h"
57 #include "main/blend.h"
58 #include "main/varray.h"
59 #include "main/shaderapi.h"
60 #include "util/ralloc.h"
62 #include "drivers/common/meta.h"
63 #include "brw_meta_util.h"
65 #define FILE_DEBUG_FLAG DEBUG_FBO
68 int src_x0
, src_y0
, src_x1
, src_y1
;
69 int dst_x0
, dst_y0
, dst_x1
, dst_y1
;
70 bool mirror_x
, mirror_y
;
73 static const char *vs_source
=
76 "out vec2 tex_coords;\n"
79 " tex_coords = (position + 1.0) / 2.0;\n"
80 " gl_Position = vec4(position, 0.0, 1.0);\n"
83 static const struct sampler_and_fetch
{
87 { "uniform usampler2D texSampler;\n",
88 " out_color = texelFetch(texSampler, txl_coords, 0)" },
89 { "#extension GL_ARB_texture_multisample : enable\n"
90 "uniform usampler2DMS texSampler;\n",
91 " out_color = texelFetch(texSampler, txl_coords, sample_index)" }
95 * Translating Y-tiled to W-tiled:
97 * X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1
98 * Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1
100 static const char *fs_tmpl
=
103 "uniform float src_x_scale;\n"
104 "uniform float src_y_scale;\n"
105 "uniform float src_x_off;\n" /* Top right coordinates of the source */
106 "uniform float src_y_off;\n" /* rectangle in W-tiled space. */
107 "uniform float dst_x_off;\n" /* Top right coordinates of the target */
108 "uniform float dst_y_off;\n" /* rectangle in Y-tiled space. */
109 "uniform float draw_rect_w;\n" /* This is the unnormalized size of the */
110 "uniform float draw_rect_h;\n" /* drawing rectangle in Y-tiled space. */
111 "uniform int dst_x0;\n" /* This is the bounding rectangle in the W-tiled */
112 "uniform int dst_x1;\n" /* space that will be used to skip pixels lying */
113 "uniform int dst_y0;\n" /* outside. In some cases the Y-tiled rectangle */
114 "uniform int dst_y1;\n" /* is larger. */
115 "uniform int dst_num_samples;\n"
116 "in vec2 tex_coords;\n"
117 "ivec2 txl_coords;\n"
118 "int sample_index;\n"
119 "out uvec4 out_color;\n"
121 "void get_unorm_target_coords()\n"
123 " txl_coords.x = int(tex_coords.x * draw_rect_w + dst_x_off);\n"
124 " txl_coords.y = int(tex_coords.y * draw_rect_h + dst_y_off);\n"
127 "void translate_dst_to_src()\n"
129 " txl_coords.x = int(float(txl_coords.x) * src_x_scale + src_x_off);\n"
130 " txl_coords.y = int(float(txl_coords.y) * src_y_scale + src_y_off);\n"
133 "void translate_y_to_w_tiling()\n"
135 " int X = txl_coords.x;\n"
136 " int Y = txl_coords.y;\n"
137 " txl_coords.x = (X & int(0xfff4)) >> 1;\n"
138 " txl_coords.x |= ((Y & int(0x1)) << 2);\n"
139 " txl_coords.x |= (X & int(0x1));\n"
140 " txl_coords.y = (Y & int(0xfffe)) << 1;\n"
141 " txl_coords.y |= ((X & int(0x8)) >> 2);\n"
142 " txl_coords.y |= ((X & int(0x2)) >> 1);\n"
145 "void decode_msaa()\n"
147 " int X = txl_coords.x;\n"
148 " int Y = txl_coords.y;\n"
149 " switch (dst_num_samples) {\n"
151 " sample_index = 0;\n"
154 " txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
155 " sample_index = (X & 0x2) >> 1;\n"
158 " txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
159 " txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
160 " sample_index = (Y & 0x2) | ((X & 0x2) >> 1);\n"
163 " txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
164 " txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
165 " sample_index = (X & 0x4) | (Y & 0x2) | ((X & 0x2) >> 1);\n"
168 " txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
169 " txl_coords.y = ((Y & int(0xfff8)) >> 2) | (Y & int(0x1));\n"
170 " sample_index = (((Y & 0x4) << 1) | (X & 0x4) | (Y & 0x2) |\n"
171 " ((X & 0x2) >> 1));\n"
176 "void discard_outside_bounding_rect()\n"
178 " int X = txl_coords.x;\n"
179 " int Y = txl_coords.y;\n"
180 " if (X >= dst_x1 || X < dst_x0 || Y >= dst_y1 || Y < dst_y0)\n"
186 " get_unorm_target_coords();\n"
187 " translate_y_to_w_tiling();\n"
189 " discard_outside_bounding_rect();\n"
190 " translate_dst_to_src();\n"
195 * Setup uniforms telling the coordinates of the destination rectangle in the
196 * native w-tiled space. These are needed to ignore pixels that lie outside.
197 * The destination is drawn as Y-tiled and in some cases the Y-tiled drawing
198 * rectangle is larger than the original (for example 1x4 w-tiled requires
202 setup_bounding_rect(GLuint prog
, const struct blit_dims
*dims
)
204 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_x0"), dims
->dst_x0
);
205 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_x1"), dims
->dst_x1
);
206 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_y0"), dims
->dst_y0
);
207 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_y1"), dims
->dst_y1
);
211 * Setup uniforms telling the destination width, height and the offset. These
212 * are needed to unnormalize the input coordinates and to correctly translate
213 * between destination and source that may have differing offsets.
216 setup_drawing_rect(GLuint prog
, const struct blit_dims
*dims
)
218 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "draw_rect_w"),
219 dims
->dst_x1
- dims
->dst_x0
);
220 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "draw_rect_h"),
221 dims
->dst_y1
- dims
->dst_y0
);
222 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "dst_x_off"), dims
->dst_x0
);
223 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "dst_y_off"), dims
->dst_y0
);
227 * When not mirroring a coordinate (say, X), we need:
228 * src_x - src_x0 = (dst_x - dst_x0 + 0.5) * scale
230 * src_x = src_x0 + (dst_x - dst_x0 + 0.5) * scale
232 * The program uses "round toward zero" to convert the transformed floating
233 * point coordinates to integer coordinates, whereas the behaviour we actually
234 * want is "round to nearest", so 0.5 provides the necessary correction.
236 * When mirroring X we need:
237 * src_x - src_x0 = dst_x1 - dst_x - 0.5
239 * src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
242 setup_coord_coeff(GLuint prog
, GLuint multiplier
, GLuint offset
,
243 int src_0
, int src_1
, int dst_0
, int dst_1
, bool mirror
)
245 const float scale
= ((float)(src_1
- src_0
)) / (dst_1
- dst_0
);
248 _mesa_Uniform1f(multiplier
, -scale
);
249 _mesa_Uniform1f(offset
, src_0
+ (dst_1
- 0.5f
) * scale
);
251 _mesa_Uniform1f(multiplier
, scale
);
252 _mesa_Uniform1f(offset
, src_0
+ (-dst_0
+ 0.5f
) * scale
);
257 * Setup uniforms providing relation between source and destination surfaces.
258 * Destination coordinates are in Y-tiling layout while texelFetch() expects
259 * W-tiled coordinates. Once the destination coordinates are re-interpreted by
260 * the program into the original W-tiled layout, the program needs to know the
261 * offset and scaling factors between the destination and source.
262 * Note that these are calculated in the original W-tiled space before the
263 * destination rectangle is adjusted for possible msaa and Y-tiling.
266 setup_coord_transform(GLuint prog
, const struct blit_dims
*dims
)
268 setup_coord_coeff(prog
,
269 _mesa_GetUniformLocation(prog
, "src_x_scale"),
270 _mesa_GetUniformLocation(prog
, "src_x_off"),
271 dims
->src_x0
, dims
->src_x1
, dims
->dst_x0
, dims
->dst_x1
,
274 setup_coord_coeff(prog
,
275 _mesa_GetUniformLocation(prog
, "src_y_scale"),
276 _mesa_GetUniformLocation(prog
, "src_y_off"),
277 dims
->src_y0
, dims
->src_y1
, dims
->dst_y0
, dims
->dst_y1
,
282 setup_program(struct brw_context
*brw
, bool msaa_tex
)
284 struct gl_context
*ctx
= &brw
->ctx
;
285 struct blit_state
*blit
= &ctx
->Meta
->Blit
;
287 const struct sampler_and_fetch
*sampler
= &samplers
[msaa_tex
];
289 _mesa_meta_setup_vertex_objects(&brw
->ctx
, &blit
->VAO
, &blit
->buf_obj
, true,
292 GLuint
*prog_id
= &brw
->meta_stencil_blit_programs
[msaa_tex
];
295 _mesa_UseProgram(*prog_id
);
299 fs_source
= ralloc_asprintf(NULL
, fs_tmpl
, sampler
->sampler
,
301 _mesa_meta_compile_and_link_program(ctx
, vs_source
, fs_source
,
304 ralloc_free(fs_source
);
310 * Samples in stencil buffer are interleaved, and unfortunately the data port
311 * does not support it as render target. Therefore the surface is set up as
312 * single sampled and the program handles the interleaving.
313 * In case of single sampled stencil, the render buffer is adjusted with
314 * twice the base level height in order for the program to be able to write
315 * any mip-level. (Used to set the drawing rectangle for the hw).
318 adjust_msaa(struct blit_dims
*dims
, int num_samples
)
320 if (num_samples
== 2) {
323 } else if (num_samples
) {
324 const int y_num_samples
= num_samples
>= 16 ? 4 : 2;
325 const int x_num_samples
= num_samples
/ y_num_samples
;
326 dims
->dst_x0
= ROUND_DOWN_TO(dims
->dst_x0
* x_num_samples
,
328 dims
->dst_y0
= ROUND_DOWN_TO(dims
->dst_y0
* y_num_samples
,
330 dims
->dst_x1
= ALIGN(dims
->dst_x1
* x_num_samples
,
332 dims
->dst_y1
= ALIGN(dims
->dst_y1
* y_num_samples
,
338 * Stencil is mapped as Y-tiled render target and the dimensions need to be
339 * adjusted in order for the Y-tiled rectangle to cover the entire linear
340 * memory space of the original W-tiled rectangle.
343 adjust_tiling(struct blit_dims
*dims
, int num_samples
)
345 const unsigned x_align
= 8, y_align
= num_samples
> 2 ? 8 : 4;
347 dims
->dst_x0
= ROUND_DOWN_TO(dims
->dst_x0
, x_align
) * 2;
348 dims
->dst_y0
= ROUND_DOWN_TO(dims
->dst_y0
, y_align
) / 2;
349 dims
->dst_x1
= ALIGN(dims
->dst_x1
, x_align
) * 2;
350 dims
->dst_y1
= ALIGN(dims
->dst_y1
, y_align
) / 2;
354 * When stencil is mapped as Y-tiled render target the mip-level offsets
355 * calculated for the Y-tiling do not always match the offsets in W-tiling.
356 * Therefore the sampling engine cannot be used for individual mip-level
357 * access but the program needs to do it internally. This can be achieved
358 * by shifting the coordinates of the blit rectangle here.
361 adjust_mip_level(const struct intel_mipmap_tree
*mt
,
362 unsigned level
, unsigned layer
, struct blit_dims
*dims
)
367 intel_miptree_get_image_offset(mt
, level
, layer
, &x_offset
, &y_offset
);
369 dims
->dst_x0
+= x_offset
;
370 dims
->dst_y0
+= y_offset
;
371 dims
->dst_x1
+= x_offset
;
372 dims
->dst_y1
+= y_offset
;
376 prepare_vertex_data(struct gl_context
*ctx
, struct gl_buffer_object
*buf_obj
)
378 static const struct vertex verts
[] = {
379 { .x
= -1.0f
, .y
= -1.0f
},
380 { .x
= 1.0f
, .y
= -1.0f
},
381 { .x
= 1.0f
, .y
= 1.0f
},
382 { .x
= -1.0f
, .y
= 1.0f
} };
384 _mesa_buffer_sub_data(ctx
, buf_obj
, 0, sizeof(verts
), verts
, __func__
);
388 set_read_rb_tex_image(struct gl_context
*ctx
, struct fb_tex_blit_state
*blit
,
391 const struct gl_renderbuffer_attachment
*att
=
392 &ctx
->ReadBuffer
->Attachment
[BUFFER_STENCIL
];
393 struct gl_renderbuffer
*rb
= att
->Renderbuffer
;
394 struct gl_texture_object
*tex_obj
;
397 /* If the renderbuffer is already backed by an tex image, use it. */
399 tex_obj
= att
->Texture
;
400 *target
= tex_obj
->Target
;
401 level
= att
->TextureLevel
;
403 if (!_mesa_meta_bind_rb_as_tex_image(ctx
, rb
, &blit
->tempTex
, &tex_obj
,
409 blit
->baseLevelSave
= tex_obj
->BaseLevel
;
410 blit
->maxLevelSave
= tex_obj
->MaxLevel
;
411 blit
->stencilSamplingSave
= tex_obj
->StencilSampling
;
412 blit
->samp_obj
= _mesa_meta_setup_sampler(ctx
, tex_obj
, *target
,
418 brw_meta_stencil_blit(struct brw_context
*brw
,
419 struct intel_mipmap_tree
*dst_mt
,
420 unsigned dst_level
, unsigned dst_layer
,
421 const struct blit_dims
*orig_dims
)
423 struct gl_context
*ctx
= &brw
->ctx
;
424 struct blit_dims dims
= *orig_dims
;
425 struct fb_tex_blit_state blit
;
426 GLuint prog
, fbo
, rbo
;
429 _mesa_meta_fb_tex_blit_begin(ctx
, &blit
);
430 /* XXX: Pretend to support stencil textures so _mesa_base_tex_format()
431 * returns a valid format. When we properly support the extension, we
432 * should remove this.
434 assert(ctx
->Extensions
.ARB_texture_stencil8
== false);
435 ctx
->Extensions
.ARB_texture_stencil8
= true;
437 _mesa_GenFramebuffers(1, &fbo
);
438 /* Force the surface to be configured for level zero. */
439 rbo
= brw_get_rb_for_slice(brw
, dst_mt
, 0, dst_layer
, true);
440 adjust_msaa(&dims
, dst_mt
->num_samples
);
441 adjust_tiling(&dims
, dst_mt
->num_samples
);
443 _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER
, fbo
);
444 _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER
, GL_COLOR_ATTACHMENT0
,
445 GL_RENDERBUFFER
, rbo
);
446 _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0
);
447 ctx
->DrawBuffer
->_Status
= GL_FRAMEBUFFER_COMPLETE
;
449 if (!set_read_rb_tex_image(ctx
, &blit
, &target
)) {
453 _mesa_TexParameteri(target
, GL_DEPTH_STENCIL_TEXTURE_MODE
,
456 prog
= setup_program(brw
, target
!= GL_TEXTURE_2D
);
457 setup_bounding_rect(prog
, orig_dims
);
458 setup_drawing_rect(prog
, &dims
);
459 setup_coord_transform(prog
, orig_dims
);
461 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_num_samples"),
462 dst_mt
->num_samples
);
464 prepare_vertex_data(ctx
, ctx
->Meta
->Blit
.buf_obj
);
465 _mesa_set_viewport(ctx
, 0, dims
.dst_x0
, dims
.dst_y0
,
466 dims
.dst_x1
- dims
.dst_x0
, dims
.dst_y1
- dims
.dst_y0
);
467 _mesa_ColorMask(GL_TRUE
, GL_TRUE
, GL_TRUE
, GL_TRUE
);
468 _mesa_set_enable(ctx
, GL_DEPTH_TEST
, false);
470 _mesa_DrawArrays(GL_TRIANGLE_FAN
, 0, 4);
473 ctx
->Extensions
.ARB_texture_stencil8
= false;
474 _mesa_meta_fb_tex_blit_end(ctx
, target
, &blit
);
477 _mesa_DeleteRenderbuffers(1, &rbo
);
478 _mesa_DeleteFramebuffers(1, &fbo
);
482 brw_meta_fbo_stencil_blit(struct brw_context
*brw
,
483 struct gl_framebuffer
*read_fb
,
484 struct gl_framebuffer
*draw_fb
,
485 GLfloat src_x0
, GLfloat src_y0
,
486 GLfloat src_x1
, GLfloat src_y1
,
487 GLfloat dst_x0
, GLfloat dst_y0
,
488 GLfloat dst_x1
, GLfloat dst_y1
)
490 struct gl_context
*ctx
= &brw
->ctx
;
491 struct gl_renderbuffer
*draw_rb
=
492 draw_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
;
493 const struct intel_renderbuffer
*dst_irb
= intel_renderbuffer(draw_rb
);
494 struct intel_mipmap_tree
*dst_mt
= dst_irb
->mt
;
499 if (dst_mt
->stencil_mt
)
500 dst_mt
= dst_mt
->stencil_mt
;
502 bool mirror_x
, mirror_y
;
503 if (brw_meta_mirror_clip_and_scissor(ctx
, read_fb
, draw_fb
,
504 &src_x0
, &src_y0
, &src_x1
, &src_y1
,
505 &dst_x0
, &dst_y0
, &dst_x1
, &dst_y1
,
506 &mirror_x
, &mirror_y
))
509 struct blit_dims dims
= { .src_x0
= src_x0
, .src_y0
= src_y0
,
510 .src_x1
= src_x1
, .src_y1
= src_y1
,
511 .dst_x0
= dst_x0
, .dst_y0
= dst_y0
,
512 .dst_x1
= dst_x1
, .dst_y1
= dst_y1
,
513 .mirror_x
= mirror_x
, .mirror_y
= mirror_y
};
514 adjust_mip_level(dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
, &dims
);
516 brw_emit_mi_flush(brw
);
517 _mesa_meta_begin(ctx
, MESA_META_ALL
);
518 brw_meta_stencil_blit(brw
,
519 dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
, &dims
);
520 brw_emit_mi_flush(brw
);
524 brw_meta_stencil_updownsample(struct brw_context
*brw
,
525 struct intel_mipmap_tree
*src
,
526 struct intel_mipmap_tree
*dst
)
528 struct gl_context
*ctx
= &brw
->ctx
;
529 struct blit_dims dims
= {
530 .src_x0
= 0, .src_y0
= 0,
531 .src_x1
= src
->logical_width0
, .src_y1
= src
->logical_height0
,
532 .dst_x0
= 0, .dst_y0
= 0,
533 .dst_x1
= dst
->logical_width0
, .dst_y1
= dst
->logical_height0
,
534 .mirror_x
= 0, .mirror_y
= 0 };
538 dst
= dst
->stencil_mt
;
540 brw_emit_mi_flush(brw
);
541 _mesa_meta_begin(ctx
, MESA_META_ALL
);
543 _mesa_GenFramebuffers(1, &fbo
);
544 rbo
= brw_get_rb_for_slice(brw
, src
, 0, 0, false);
546 _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER
, fbo
);
547 _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER
, GL_STENCIL_ATTACHMENT
,
548 GL_RENDERBUFFER
, rbo
);
550 brw_meta_stencil_blit(brw
, dst
, 0, 0, &dims
);
551 brw_emit_mi_flush(brw
);
553 _mesa_DeleteRenderbuffers(1, &rbo
);
554 _mesa_DeleteFramebuffers(1, &fbo
);