2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @file brw_meta_stencil_blit.c
27 * Implements upsampling, downsampling and scaling of stencil miptrees. The
28 * logic can be originally found in brw_blorp_blit.c.
29 * Implementation creates a temporary draw framebuffer object and attaches the
30 * destination stencil buffer attachment as color attachment. Source attachment
31 * is in turn treated as a stencil texture and the glsl program used for the
32 * blitting samples it using stencil-indexing.
34 * Unfortunately as the data port does not support interleaved msaa-surfaces
35 * (stencil is always IMS), the glsl program needs to handle the writing of
36 * individual samples manually. Surface is configured as if it were single
37 * sampled (with adjusted dimensions) and the glsl program extracts the
38 * sample indices from the input coordinates for correct texturing.
40 * Target surface is also configured as Y-tiled instead of W-tiled in order
41 * to support generations 6-7. Later hardware supports W-tiled as render target
42 * and the logic here could be simplified for those.
45 #include "brw_context.h"
46 #include "intel_batchbuffer.h"
47 #include "intel_fbo.h"
49 #include "main/blit.h"
50 #include "main/buffers.h"
51 #include "main/fbobject.h"
52 #include "main/uniforms.h"
53 #include "main/texparam.h"
54 #include "main/texobj.h"
55 #include "main/viewport.h"
56 #include "main/enable.h"
57 #include "main/blend.h"
58 #include "main/varray.h"
59 #include "main/shaderapi.h"
60 #include "main/renderbuffer.h"
61 #include "util/ralloc.h"
63 #include "drivers/common/meta.h"
64 #include "brw_meta_util.h"
66 #define FILE_DEBUG_FLAG DEBUG_FBO
69 int src_x0
, src_y0
, src_x1
, src_y1
;
70 int dst_x0
, dst_y0
, dst_x1
, dst_y1
;
71 bool mirror_x
, mirror_y
;
74 static const char *vs_source
=
77 "out vec2 tex_coords;\n"
80 " tex_coords = (position + 1.0) / 2.0;\n"
81 " gl_Position = vec4(position, 0.0, 1.0);\n"
84 static const struct sampler_and_fetch
{
88 { "uniform usampler2D texSampler;\n",
89 " out_color = texelFetch(texSampler, txl_coords, 0)" },
90 { "#extension GL_ARB_texture_multisample : enable\n"
91 "uniform usampler2DMS texSampler;\n",
92 " out_color = texelFetch(texSampler, txl_coords, sample_index)" }
96 * Translating Y-tiled to W-tiled:
98 * X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1
99 * Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1
101 static const char *fs_tmpl
=
104 "uniform float src_x_scale;\n"
105 "uniform float src_y_scale;\n"
106 "uniform float src_x_off;\n" /* Top right coordinates of the source */
107 "uniform float src_y_off;\n" /* rectangle in W-tiled space. */
108 "uniform float dst_x_off;\n" /* Top right coordinates of the target */
109 "uniform float dst_y_off;\n" /* rectangle in Y-tiled space. */
110 "uniform float draw_rect_w;\n" /* This is the unnormalized size of the */
111 "uniform float draw_rect_h;\n" /* drawing rectangle in Y-tiled space. */
112 "uniform int dst_x0;\n" /* This is the bounding rectangle in the W-tiled */
113 "uniform int dst_x1;\n" /* space that will be used to skip pixels lying */
114 "uniform int dst_y0;\n" /* outside. In some cases the Y-tiled rectangle */
115 "uniform int dst_y1;\n" /* is larger. */
116 "uniform int dst_num_samples;\n"
117 "in vec2 tex_coords;\n"
118 "ivec2 txl_coords;\n"
119 "int sample_index;\n"
120 "out uvec4 out_color;\n"
122 "void get_unorm_target_coords()\n"
124 " txl_coords.x = int(tex_coords.x * draw_rect_w + dst_x_off);\n"
125 " txl_coords.y = int(tex_coords.y * draw_rect_h + dst_y_off);\n"
128 "void translate_dst_to_src()\n"
130 " txl_coords.x = int(float(txl_coords.x) * src_x_scale + src_x_off);\n"
131 " txl_coords.y = int(float(txl_coords.y) * src_y_scale + src_y_off);\n"
134 "void translate_y_to_w_tiling()\n"
136 " int X = txl_coords.x;\n"
137 " int Y = txl_coords.y;\n"
138 " txl_coords.x = (X & int(0xfff4)) >> 1;\n"
139 " txl_coords.x |= ((Y & int(0x1)) << 2);\n"
140 " txl_coords.x |= (X & int(0x1));\n"
141 " txl_coords.y = (Y & int(0xfffe)) << 1;\n"
142 " txl_coords.y |= ((X & int(0x8)) >> 2);\n"
143 " txl_coords.y |= ((X & int(0x2)) >> 1);\n"
146 "void decode_msaa()\n"
148 " int X = txl_coords.x;\n"
149 " int Y = txl_coords.y;\n"
150 " switch (dst_num_samples) {\n"
152 " sample_index = 0;\n"
155 " txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
156 " sample_index = (X & 0x2) >> 1;\n"
159 " txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
160 " txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
161 " sample_index = (Y & 0x2) | ((X & 0x2) >> 1);\n"
164 " txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
165 " txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
166 " sample_index = (X & 0x4) | (Y & 0x2) | ((X & 0x2) >> 1);\n"
169 " txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
170 " txl_coords.y = ((Y & int(0xfff8)) >> 2) | (Y & int(0x1));\n"
171 " sample_index = (((Y & 0x4) << 1) | (X & 0x4) | (Y & 0x2) |\n"
172 " ((X & 0x2) >> 1));\n"
177 "void discard_outside_bounding_rect()\n"
179 " int X = txl_coords.x;\n"
180 " int Y = txl_coords.y;\n"
181 " if (X >= dst_x1 || X < dst_x0 || Y >= dst_y1 || Y < dst_y0)\n"
187 " get_unorm_target_coords();\n"
188 " translate_y_to_w_tiling();\n"
190 " discard_outside_bounding_rect();\n"
191 " translate_dst_to_src();\n"
196 * Setup uniforms telling the coordinates of the destination rectangle in the
197 * native w-tiled space. These are needed to ignore pixels that lie outside.
198 * The destination is drawn as Y-tiled and in some cases the Y-tiled drawing
199 * rectangle is larger than the original (for example 1x4 w-tiled requires
203 setup_bounding_rect(GLuint prog
, const struct blit_dims
*dims
)
205 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_x0"), dims
->dst_x0
);
206 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_x1"), dims
->dst_x1
);
207 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_y0"), dims
->dst_y0
);
208 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_y1"), dims
->dst_y1
);
212 * Setup uniforms telling the destination width, height and the offset. These
213 * are needed to unnormalize the input coordinates and to correctly translate
214 * between destination and source that may have differing offsets.
217 setup_drawing_rect(GLuint prog
, const struct blit_dims
*dims
)
219 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "draw_rect_w"),
220 dims
->dst_x1
- dims
->dst_x0
);
221 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "draw_rect_h"),
222 dims
->dst_y1
- dims
->dst_y0
);
223 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "dst_x_off"), dims
->dst_x0
);
224 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "dst_y_off"), dims
->dst_y0
);
228 * When not mirroring a coordinate (say, X), we need:
229 * src_x - src_x0 = (dst_x - dst_x0 + 0.5) * scale
231 * src_x = src_x0 + (dst_x - dst_x0 + 0.5) * scale
233 * The program uses "round toward zero" to convert the transformed floating
234 * point coordinates to integer coordinates, whereas the behaviour we actually
235 * want is "round to nearest", so 0.5 provides the necessary correction.
237 * When mirroring X we need:
238 * src_x - src_x0 = dst_x1 - dst_x - 0.5
240 * src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
243 setup_coord_coeff(GLuint prog
, GLuint multiplier
, GLuint offset
,
244 int src_0
, int src_1
, int dst_0
, int dst_1
, bool mirror
)
246 const float scale
= ((float)(src_1
- src_0
)) / (dst_1
- dst_0
);
249 _mesa_Uniform1f(multiplier
, -scale
);
250 _mesa_Uniform1f(offset
, src_0
+ (dst_1
- 0.5f
) * scale
);
252 _mesa_Uniform1f(multiplier
, scale
);
253 _mesa_Uniform1f(offset
, src_0
+ (-dst_0
+ 0.5f
) * scale
);
258 * Setup uniforms providing relation between source and destination surfaces.
259 * Destination coordinates are in Y-tiling layout while texelFetch() expects
260 * W-tiled coordinates. Once the destination coordinates are re-interpreted by
261 * the program into the original W-tiled layout, the program needs to know the
262 * offset and scaling factors between the destination and source.
263 * Note that these are calculated in the original W-tiled space before the
264 * destination rectangle is adjusted for possible msaa and Y-tiling.
267 setup_coord_transform(GLuint prog
, const struct blit_dims
*dims
)
269 setup_coord_coeff(prog
,
270 _mesa_GetUniformLocation(prog
, "src_x_scale"),
271 _mesa_GetUniformLocation(prog
, "src_x_off"),
272 dims
->src_x0
, dims
->src_x1
, dims
->dst_x0
, dims
->dst_x1
,
275 setup_coord_coeff(prog
,
276 _mesa_GetUniformLocation(prog
, "src_y_scale"),
277 _mesa_GetUniformLocation(prog
, "src_y_off"),
278 dims
->src_y0
, dims
->src_y1
, dims
->dst_y0
, dims
->dst_y1
,
283 setup_program(struct brw_context
*brw
, bool msaa_tex
)
285 struct gl_context
*ctx
= &brw
->ctx
;
286 struct blit_state
*blit
= &ctx
->Meta
->Blit
;
288 const struct sampler_and_fetch
*sampler
= &samplers
[msaa_tex
];
290 _mesa_meta_setup_vertex_objects(&brw
->ctx
, &blit
->VAO
, &blit
->buf_obj
, true,
293 GLuint
*prog_id
= &brw
->meta_stencil_blit_programs
[msaa_tex
];
296 _mesa_UseProgram(*prog_id
);
300 fs_source
= ralloc_asprintf(NULL
, fs_tmpl
, sampler
->sampler
,
302 _mesa_meta_compile_and_link_program(ctx
, vs_source
, fs_source
,
305 ralloc_free(fs_source
);
311 * Samples in stencil buffer are interleaved, and unfortunately the data port
312 * does not support it as render target. Therefore the surface is set up as
313 * single sampled and the program handles the interleaving.
314 * In case of single sampled stencil, the render buffer is adjusted with
315 * twice the base level height in order for the program to be able to write
316 * any mip-level. (Used to set the drawing rectangle for the hw).
319 adjust_msaa(struct blit_dims
*dims
, int num_samples
)
321 if (num_samples
== 2) {
324 } else if (num_samples
) {
325 const int y_num_samples
= num_samples
>= 16 ? 4 : 2;
326 const int x_num_samples
= num_samples
/ y_num_samples
;
327 dims
->dst_x0
= ROUND_DOWN_TO(dims
->dst_x0
* x_num_samples
,
329 dims
->dst_y0
= ROUND_DOWN_TO(dims
->dst_y0
* y_num_samples
,
331 dims
->dst_x1
= ALIGN(dims
->dst_x1
* x_num_samples
,
333 dims
->dst_y1
= ALIGN(dims
->dst_y1
* y_num_samples
,
339 * Stencil is mapped as Y-tiled render target and the dimensions need to be
340 * adjusted in order for the Y-tiled rectangle to cover the entire linear
341 * memory space of the original W-tiled rectangle.
344 adjust_tiling(struct blit_dims
*dims
, int num_samples
)
346 const unsigned x_align
= 8, y_align
= num_samples
> 2 ? 8 : 4;
348 dims
->dst_x0
= ROUND_DOWN_TO(dims
->dst_x0
, x_align
) * 2;
349 dims
->dst_y0
= ROUND_DOWN_TO(dims
->dst_y0
, y_align
) / 2;
350 dims
->dst_x1
= ALIGN(dims
->dst_x1
, x_align
) * 2;
351 dims
->dst_y1
= ALIGN(dims
->dst_y1
, y_align
) / 2;
355 * When stencil is mapped as Y-tiled render target the mip-level offsets
356 * calculated for the Y-tiling do not always match the offsets in W-tiling.
357 * Therefore the sampling engine cannot be used for individual mip-level
358 * access but the program needs to do it internally. This can be achieved
359 * by shifting the coordinates of the blit rectangle here.
362 adjust_mip_level(const struct intel_mipmap_tree
*mt
,
363 unsigned level
, unsigned layer
, struct blit_dims
*dims
)
368 intel_miptree_get_image_offset(mt
, level
, layer
, &x_offset
, &y_offset
);
370 dims
->dst_x0
+= x_offset
;
371 dims
->dst_y0
+= y_offset
;
372 dims
->dst_x1
+= x_offset
;
373 dims
->dst_y1
+= y_offset
;
377 prepare_vertex_data(struct gl_context
*ctx
, struct gl_buffer_object
*buf_obj
)
379 static const struct vertex verts
[] = {
380 { .x
= -1.0f
, .y
= -1.0f
},
381 { .x
= 1.0f
, .y
= -1.0f
},
382 { .x
= 1.0f
, .y
= 1.0f
},
383 { .x
= -1.0f
, .y
= 1.0f
} };
385 _mesa_buffer_sub_data(ctx
, buf_obj
, 0, sizeof(verts
), verts
, __func__
);
389 set_read_rb_tex_image(struct gl_context
*ctx
, struct fb_tex_blit_state
*blit
,
392 const struct gl_renderbuffer_attachment
*att
=
393 &ctx
->ReadBuffer
->Attachment
[BUFFER_STENCIL
];
394 struct gl_renderbuffer
*rb
= att
->Renderbuffer
;
395 struct gl_texture_object
*tex_obj
;
398 /* If the renderbuffer is already backed by an tex image, use it. */
400 tex_obj
= att
->Texture
;
401 *target
= tex_obj
->Target
;
402 level
= att
->TextureLevel
;
404 if (!_mesa_meta_bind_rb_as_tex_image(ctx
, rb
, &blit
->tempTex
, &tex_obj
,
410 blit
->baseLevelSave
= tex_obj
->BaseLevel
;
411 blit
->maxLevelSave
= tex_obj
->MaxLevel
;
412 blit
->stencilSamplingSave
= tex_obj
->StencilSampling
;
413 blit
->samp_obj
= _mesa_meta_setup_sampler(ctx
, tex_obj
, *target
,
419 brw_meta_stencil_blit(struct brw_context
*brw
,
420 struct intel_mipmap_tree
*dst_mt
,
421 unsigned dst_level
, unsigned dst_layer
,
422 const struct blit_dims
*orig_dims
)
424 struct gl_context
*ctx
= &brw
->ctx
;
425 struct blit_dims dims
= *orig_dims
;
426 struct fb_tex_blit_state blit
;
428 struct gl_renderbuffer
*rb
;
431 _mesa_meta_fb_tex_blit_begin(ctx
, &blit
);
432 /* XXX: Pretend to support stencil textures so _mesa_base_tex_format()
433 * returns a valid format. When we properly support the extension, we
434 * should remove this.
436 assert(ctx
->Extensions
.ARB_texture_stencil8
== false);
437 ctx
->Extensions
.ARB_texture_stencil8
= true;
439 _mesa_GenFramebuffers(1, &fbo
);
440 /* Force the surface to be configured for level zero. */
441 rb
= brw_get_rb_for_slice(brw
, dst_mt
, 0, dst_layer
, true);
442 adjust_msaa(&dims
, dst_mt
->num_samples
);
443 adjust_tiling(&dims
, dst_mt
->num_samples
);
445 _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER
, fbo
);
446 _mesa_framebuffer_renderbuffer(ctx
, ctx
->DrawBuffer
, GL_COLOR_ATTACHMENT0
,
448 _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0
);
449 ctx
->DrawBuffer
->_Status
= GL_FRAMEBUFFER_COMPLETE
;
451 if (!set_read_rb_tex_image(ctx
, &blit
, &target
)) {
455 _mesa_TexParameteri(target
, GL_DEPTH_STENCIL_TEXTURE_MODE
,
458 prog
= setup_program(brw
, target
!= GL_TEXTURE_2D
);
459 setup_bounding_rect(prog
, orig_dims
);
460 setup_drawing_rect(prog
, &dims
);
461 setup_coord_transform(prog
, orig_dims
);
463 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_num_samples"),
464 dst_mt
->num_samples
);
466 prepare_vertex_data(ctx
, ctx
->Meta
->Blit
.buf_obj
);
467 _mesa_set_viewport(ctx
, 0, dims
.dst_x0
, dims
.dst_y0
,
468 dims
.dst_x1
- dims
.dst_x0
, dims
.dst_y1
- dims
.dst_y0
);
469 _mesa_ColorMask(GL_TRUE
, GL_TRUE
, GL_TRUE
, GL_TRUE
);
470 _mesa_set_enable(ctx
, GL_DEPTH_TEST
, false);
472 _mesa_DrawArrays(GL_TRIANGLE_FAN
, 0, 4);
475 ctx
->Extensions
.ARB_texture_stencil8
= false;
476 _mesa_meta_fb_tex_blit_end(ctx
, target
, &blit
);
479 _mesa_reference_renderbuffer(&rb
, NULL
);
480 _mesa_DeleteFramebuffers(1, &fbo
);
484 brw_meta_fbo_stencil_blit(struct brw_context
*brw
,
485 struct gl_framebuffer
*read_fb
,
486 struct gl_framebuffer
*draw_fb
,
487 GLfloat src_x0
, GLfloat src_y0
,
488 GLfloat src_x1
, GLfloat src_y1
,
489 GLfloat dst_x0
, GLfloat dst_y0
,
490 GLfloat dst_x1
, GLfloat dst_y1
)
492 struct gl_context
*ctx
= &brw
->ctx
;
493 struct gl_renderbuffer
*draw_rb
=
494 draw_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
;
495 const struct intel_renderbuffer
*dst_irb
= intel_renderbuffer(draw_rb
);
496 struct intel_mipmap_tree
*dst_mt
= dst_irb
->mt
;
501 if (dst_mt
->stencil_mt
)
502 dst_mt
= dst_mt
->stencil_mt
;
504 bool mirror_x
, mirror_y
;
505 if (brw_meta_mirror_clip_and_scissor(ctx
, read_fb
, draw_fb
,
506 &src_x0
, &src_y0
, &src_x1
, &src_y1
,
507 &dst_x0
, &dst_y0
, &dst_x1
, &dst_y1
,
508 &mirror_x
, &mirror_y
))
511 struct blit_dims dims
= { .src_x0
= src_x0
, .src_y0
= src_y0
,
512 .src_x1
= src_x1
, .src_y1
= src_y1
,
513 .dst_x0
= dst_x0
, .dst_y0
= dst_y0
,
514 .dst_x1
= dst_x1
, .dst_y1
= dst_y1
,
515 .mirror_x
= mirror_x
, .mirror_y
= mirror_y
};
516 adjust_mip_level(dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
, &dims
);
518 brw_emit_mi_flush(brw
);
519 _mesa_meta_begin(ctx
, MESA_META_ALL
);
520 brw_meta_stencil_blit(brw
,
521 dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
, &dims
);
522 brw_emit_mi_flush(brw
);
526 brw_meta_stencil_updownsample(struct brw_context
*brw
,
527 struct intel_mipmap_tree
*src
,
528 struct intel_mipmap_tree
*dst
)
530 struct gl_context
*ctx
= &brw
->ctx
;
531 struct blit_dims dims
= {
532 .src_x0
= 0, .src_y0
= 0,
533 .src_x1
= src
->logical_width0
, .src_y1
= src
->logical_height0
,
534 .dst_x0
= 0, .dst_y0
= 0,
535 .dst_x1
= dst
->logical_width0
, .dst_y1
= dst
->logical_height0
,
536 .mirror_x
= 0, .mirror_y
= 0 };
538 struct gl_renderbuffer
*rb
;
541 dst
= dst
->stencil_mt
;
543 brw_emit_mi_flush(brw
);
544 _mesa_meta_begin(ctx
, MESA_META_ALL
);
546 _mesa_GenFramebuffers(1, &fbo
);
547 rb
= brw_get_rb_for_slice(brw
, src
, 0, 0, false);
549 _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER
, fbo
);
550 _mesa_framebuffer_renderbuffer(ctx
, ctx
->ReadBuffer
, GL_STENCIL_ATTACHMENT
,
553 brw_meta_stencil_blit(brw
, dst
, 0, 0, &dims
);
554 brw_emit_mi_flush(brw
);
556 _mesa_reference_renderbuffer(&rb
, NULL
);
557 _mesa_DeleteFramebuffers(1, &fbo
);