2 * Copyright © 2014 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * @file brw_meta_stencil_blit.c
27 * Implements upsampling, downsampling and scaling of stencil miptrees. The
28 * logic can be originally found in brw_blorp_blit.c.
29 * Implementation creates a temporary draw framebuffer object and attaches the
30 * destination stencil buffer attachment as color attachment. Source attachment
31 * is in turn treated as a stencil texture and the glsl program used for the
32 * blitting samples it using stencil-indexing.
34 * Unfortunately as the data port does not support interleaved msaa-surfaces
35 * (stencil is always IMS), the glsl program needs to handle the writing of
36 * individual samples manually. Surface is configured as if it were single
37 * sampled (with adjusted dimensions) and the glsl program extracts the
38 * sample indices from the input coordinates for correct texturing.
40 * Target surface is also configured as Y-tiled instead of W-tiled in order
41 * to support generations 6-7. Later hardware supports W-tiled as render target
42 * and the logic here could be simplified for those.
45 #include "brw_context.h"
46 #include "intel_batchbuffer.h"
47 #include "intel_fbo.h"
49 #include "main/blit.h"
50 #include "main/buffers.h"
51 #include "main/fbobject.h"
52 #include "main/uniforms.h"
53 #include "main/texparam.h"
54 #include "main/texobj.h"
55 #include "main/viewport.h"
56 #include "main/enable.h"
57 #include "main/blend.h"
58 #include "main/varray.h"
59 #include "main/shaderapi.h"
60 #include "util/ralloc.h"
62 #include "drivers/common/meta.h"
63 #include "brw_meta_util.h"
65 #define FILE_DEBUG_FLAG DEBUG_FBO
68 int src_x0
, src_y0
, src_x1
, src_y1
;
69 int dst_x0
, dst_y0
, dst_x1
, dst_y1
;
70 bool mirror_x
, mirror_y
;
73 static const char *vs_source
=
76 "out vec2 tex_coords;\n"
79 " tex_coords = (position + 1.0) / 2.0;\n"
80 " gl_Position = vec4(position, 0.0, 1.0);\n"
83 static const struct sampler_and_fetch
{
87 { "uniform usampler2D texSampler;\n",
88 " out_color = texelFetch(texSampler, txl_coords, 0)" },
89 { "#extension GL_ARB_texture_multisample : enable\n"
90 "uniform usampler2DMS texSampler;\n",
91 " out_color = texelFetch(texSampler, txl_coords, sample_index)" }
95 * Translating Y-tiled to W-tiled:
97 * X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1
98 * Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1
100 static const char *fs_tmpl
=
103 "uniform float src_x_scale;\n"
104 "uniform float src_y_scale;\n"
105 "uniform float src_x_off;\n" /* Top right coordinates of the source */
106 "uniform float src_y_off;\n" /* rectangle in W-tiled space. */
107 "uniform float dst_x_off;\n" /* Top right coordinates of the target */
108 "uniform float dst_y_off;\n" /* rectangle in Y-tiled space. */
109 "uniform float draw_rect_w;\n" /* This is the unnormalized size of the */
110 "uniform float draw_rect_h;\n" /* drawing rectangle in Y-tiled space. */
111 "uniform int dst_x0;\n" /* This is the bounding rectangle in the W-tiled */
112 "uniform int dst_x1;\n" /* space that will be used to skip pixels lying */
113 "uniform int dst_y0;\n" /* outside. In some cases the Y-tiled rectangle */
114 "uniform int dst_y1;\n" /* is larger. */
115 "uniform int dst_num_samples;\n"
116 "in vec2 tex_coords;\n"
117 "ivec2 txl_coords;\n"
118 "int sample_index;\n"
119 "out uvec4 out_color;\n"
121 "void get_unorm_target_coords()\n"
123 " txl_coords.x = int(tex_coords.x * draw_rect_w + dst_x_off);\n"
124 " txl_coords.y = int(tex_coords.y * draw_rect_h + dst_y_off);\n"
127 "void translate_dst_to_src()\n"
129 " txl_coords.x = int(float(txl_coords.x) * src_x_scale + src_x_off);\n"
130 " txl_coords.y = int(float(txl_coords.y) * src_y_scale + src_y_off);\n"
133 "void translate_y_to_w_tiling()\n"
135 " int X = txl_coords.x;\n"
136 " int Y = txl_coords.y;\n"
137 " txl_coords.x = (X & int(0xfff4)) >> 1;\n"
138 " txl_coords.x |= ((Y & int(0x1)) << 2);\n"
139 " txl_coords.x |= (X & int(0x1));\n"
140 " txl_coords.y = (Y & int(0xfffe)) << 1;\n"
141 " txl_coords.y |= ((X & int(0x8)) >> 2);\n"
142 " txl_coords.y |= ((X & int(0x2)) >> 1);\n"
145 "void decode_msaa()\n"
147 " int X = txl_coords.x;\n"
148 " int Y = txl_coords.y;\n"
149 " switch (dst_num_samples) {\n"
151 " sample_index = 0;\n"
154 " txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
155 " sample_index = (X & 0x2) >> 1;\n"
158 " txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
159 " txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
160 " sample_index = (Y & 0x2) | ((X & 0x2) >> 1);\n"
163 " txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
164 " txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
165 " sample_index = (X & 0x4) | (Y & 0x2) | ((X & 0x2) >> 1);\n"
169 "void discard_outside_bounding_rect()\n"
171 " int X = txl_coords.x;\n"
172 " int Y = txl_coords.y;\n"
173 " if (X >= dst_x1 || X < dst_x0 || Y >= dst_y1 || Y < dst_y0)\n"
179 " get_unorm_target_coords();\n"
180 " translate_y_to_w_tiling();\n"
182 " discard_outside_bounding_rect();\n"
183 " translate_dst_to_src();\n"
188 * Setup uniforms telling the coordinates of the destination rectangle in the
189 * native w-tiled space. These are needed to ignore pixels that lie outside.
190 * The destination is drawn as Y-tiled and in some cases the Y-tiled drawing
191 * rectangle is larger than the original (for example 1x4 w-tiled requires
195 setup_bounding_rect(GLuint prog
, const struct blit_dims
*dims
)
197 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_x0"), dims
->dst_x0
);
198 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_x1"), dims
->dst_x1
);
199 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_y0"), dims
->dst_y0
);
200 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_y1"), dims
->dst_y1
);
204 * Setup uniforms telling the destination width, height and the offset. These
205 * are needed to unnoormalize the input coordinates and to correctly translate
206 * between destination and source that may have differing offsets.
209 setup_drawing_rect(GLuint prog
, const struct blit_dims
*dims
)
211 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "draw_rect_w"),
212 dims
->dst_x1
- dims
->dst_x0
);
213 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "draw_rect_h"),
214 dims
->dst_y1
- dims
->dst_y0
);
215 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "dst_x_off"), dims
->dst_x0
);
216 _mesa_Uniform1f(_mesa_GetUniformLocation(prog
, "dst_y_off"), dims
->dst_y0
);
220 * When not mirroring a coordinate (say, X), we need:
221 * src_x - src_x0 = (dst_x - dst_x0 + 0.5) * scale
223 * src_x = src_x0 + (dst_x - dst_x0 + 0.5) * scale
225 * The program uses "round toward zero" to convert the transformed floating
226 * point coordinates to integer coordinates, whereas the behaviour we actually
227 * want is "round to nearest", so 0.5 provides the necessary correction.
229 * When mirroring X we need:
230 * src_x - src_x0 = dst_x1 - dst_x - 0.5
232 * src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
235 setup_coord_coeff(GLuint prog
, GLuint multiplier
, GLuint offset
,
236 int src_0
, int src_1
, int dst_0
, int dst_1
, bool mirror
)
238 const float scale
= ((float)(src_1
- src_0
)) / (dst_1
- dst_0
);
241 _mesa_Uniform1f(multiplier
, -scale
);
242 _mesa_Uniform1f(offset
, src_0
+ (dst_1
- 0.5) * scale
);
244 _mesa_Uniform1f(multiplier
, scale
);
245 _mesa_Uniform1f(offset
, src_0
+ (-dst_0
+ 0.5) * scale
);
250 * Setup uniforms providing relation between source and destination surfaces.
251 * Destination coordinates are in Y-tiling layout while texelFetch() expects
252 * W-tiled coordinates. Once the destination coordinates are re-interpreted by
253 * the program into the original W-tiled layout, the program needs to know the
254 * offset and scaling factors between the destination and source.
255 * Note that these are calculated in the original W-tiled space before the
256 * destination rectangle is adjusted for possible msaa and Y-tiling.
259 setup_coord_transform(GLuint prog
, const struct blit_dims
*dims
)
261 setup_coord_coeff(prog
,
262 _mesa_GetUniformLocation(prog
, "src_x_scale"),
263 _mesa_GetUniformLocation(prog
, "src_x_off"),
264 dims
->src_x0
, dims
->src_x1
, dims
->dst_x0
, dims
->dst_x1
,
267 setup_coord_coeff(prog
,
268 _mesa_GetUniformLocation(prog
, "src_y_scale"),
269 _mesa_GetUniformLocation(prog
, "src_y_off"),
270 dims
->src_y0
, dims
->src_y1
, dims
->dst_y0
, dims
->dst_y1
,
275 setup_program(struct brw_context
*brw
, bool msaa_tex
)
277 struct gl_context
*ctx
= &brw
->ctx
;
278 struct blit_state
*blit
= &ctx
->Meta
->Blit
;
280 const struct sampler_and_fetch
*sampler
= &samplers
[msaa_tex
];
282 _mesa_meta_setup_vertex_objects(&blit
->VAO
, &blit
->VBO
, true, 2, 2, 0);
284 GLuint
*prog_id
= &brw
->meta_stencil_blit_programs
[msaa_tex
];
287 _mesa_UseProgram(*prog_id
);
291 fs_source
= ralloc_asprintf(NULL
, fs_tmpl
, sampler
->sampler
,
293 _mesa_meta_compile_and_link_program(ctx
, vs_source
, fs_source
,
296 ralloc_free(fs_source
);
302 * Samples in stencil buffer are interleaved, and unfortunately the data port
303 * does not support it as render target. Therefore the surface is set up as
304 * single sampled and the program handles the interleaving.
305 * In case of single sampled stencil, the render buffer is adjusted with
306 * twice the base level height in order for the program to be able to write
307 * any mip-level. (Used to set the drawing rectangle for the hw).
310 adjust_msaa(struct blit_dims
*dims
, int num_samples
)
312 if (num_samples
== 2) {
315 } else if (num_samples
) {
316 const int x_num_samples
= num_samples
/ 2;
317 dims
->dst_x0
= ROUND_DOWN_TO(dims
->dst_x0
* x_num_samples
, num_samples
);
318 dims
->dst_y0
= ROUND_DOWN_TO(dims
->dst_y0
* 2, 4);
319 dims
->dst_x1
= ALIGN(dims
->dst_x1
* x_num_samples
, num_samples
);
320 dims
->dst_y1
= ALIGN(dims
->dst_y1
* 2, 4);
325 * Stencil is mapped as Y-tiled render target and the dimensions need to be
326 * adjusted in order for the Y-tiled rectangle to cover the entire linear
327 * memory space of the original W-tiled rectangle.
330 adjust_tiling(struct blit_dims
*dims
, int num_samples
)
332 const unsigned x_align
= 8, y_align
= num_samples
> 2 ? 8 : 4;
334 dims
->dst_x0
= ROUND_DOWN_TO(dims
->dst_x0
, x_align
) * 2;
335 dims
->dst_y0
= ROUND_DOWN_TO(dims
->dst_y0
, y_align
) / 2;
336 dims
->dst_x1
= ALIGN(dims
->dst_x1
, x_align
) * 2;
337 dims
->dst_y1
= ALIGN(dims
->dst_y1
, y_align
) / 2;
341 * When stencil is mapped as Y-tiled render target the mip-level offsets
342 * calculated for the Y-tiling do not always match the offsets in W-tiling.
343 * Therefore the sampling engine cannot be used for individual mip-level
344 * access but the program needs to do it internally. This can be achieved
345 * by shifting the coordinates of the blit rectangle here.
348 adjust_mip_level(const struct intel_mipmap_tree
*mt
,
349 unsigned level
, unsigned layer
, struct blit_dims
*dims
)
354 intel_miptree_get_image_offset(mt
, level
, layer
, &x_offset
, &y_offset
);
356 dims
->dst_x0
+= x_offset
;
357 dims
->dst_y0
+= y_offset
;
358 dims
->dst_x1
+= x_offset
;
359 dims
->dst_y1
+= y_offset
;
363 prepare_vertex_data(void)
365 static const struct vertex verts
[] = {
366 { .x
= -1.0f
, .y
= -1.0f
},
367 { .x
= 1.0f
, .y
= -1.0f
},
368 { .x
= 1.0f
, .y
= 1.0f
},
369 { .x
= -1.0f
, .y
= 1.0f
} };
371 _mesa_BufferSubData(GL_ARRAY_BUFFER_ARB
, 0, sizeof(verts
), verts
);
375 set_read_rb_tex_image(struct gl_context
*ctx
, struct fb_tex_blit_state
*blit
,
378 const struct gl_renderbuffer_attachment
*att
=
379 &ctx
->ReadBuffer
->Attachment
[BUFFER_STENCIL
];
380 struct gl_renderbuffer
*rb
= att
->Renderbuffer
;
381 struct gl_texture_object
*tex_obj
;
384 /* If the renderbuffer is already backed by an tex image, use it. */
386 tex_obj
= att
->Texture
;
387 *target
= tex_obj
->Target
;
388 level
= att
->TextureLevel
;
390 if (!_mesa_meta_bind_rb_as_tex_image(ctx
, rb
, &blit
->tempTex
, &tex_obj
,
396 blit
->baseLevelSave
= tex_obj
->BaseLevel
;
397 blit
->maxLevelSave
= tex_obj
->MaxLevel
;
398 blit
->stencilSamplingSave
= tex_obj
->StencilSampling
;
399 blit
->sampler
= _mesa_meta_setup_sampler(ctx
, tex_obj
, *target
,
405 brw_meta_stencil_blit(struct brw_context
*brw
,
406 struct intel_mipmap_tree
*dst_mt
,
407 unsigned dst_level
, unsigned dst_layer
,
408 const struct blit_dims
*orig_dims
)
410 struct gl_context
*ctx
= &brw
->ctx
;
411 struct blit_dims dims
= *orig_dims
;
412 struct fb_tex_blit_state blit
;
413 GLuint prog
, fbo
, rbo
;
416 _mesa_meta_fb_tex_blit_begin(ctx
, &blit
);
417 /* XXX: Pretend to support stencil textures so _mesa_base_tex_format()
418 * returns a valid format. When we properly support the extension, we
419 * should remove this.
421 assert(ctx
->Extensions
.ARB_texture_stencil8
== false);
422 ctx
->Extensions
.ARB_texture_stencil8
= true;
424 _mesa_GenFramebuffers(1, &fbo
);
425 /* Force the surface to be configured for level zero. */
426 rbo
= brw_get_rb_for_slice(brw
, dst_mt
, 0, dst_layer
, true);
427 adjust_msaa(&dims
, dst_mt
->num_samples
);
428 adjust_tiling(&dims
, dst_mt
->num_samples
);
430 _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER
, fbo
);
431 _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER
, GL_COLOR_ATTACHMENT0
,
432 GL_RENDERBUFFER
, rbo
);
433 _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0
);
434 ctx
->DrawBuffer
->_Status
= GL_FRAMEBUFFER_COMPLETE
;
436 if (!set_read_rb_tex_image(ctx
, &blit
, &target
)) {
440 _mesa_TexParameteri(target
, GL_DEPTH_STENCIL_TEXTURE_MODE
,
443 prog
= setup_program(brw
, target
!= GL_TEXTURE_2D
);
444 setup_bounding_rect(prog
, orig_dims
);
445 setup_drawing_rect(prog
, &dims
);
446 setup_coord_transform(prog
, orig_dims
);
448 _mesa_Uniform1i(_mesa_GetUniformLocation(prog
, "dst_num_samples"),
449 dst_mt
->num_samples
);
451 prepare_vertex_data();
452 _mesa_set_viewport(ctx
, 0, dims
.dst_x0
, dims
.dst_y0
,
453 dims
.dst_x1
- dims
.dst_x0
, dims
.dst_y1
- dims
.dst_y0
);
454 _mesa_ColorMask(GL_TRUE
, GL_TRUE
, GL_TRUE
, GL_TRUE
);
455 _mesa_set_enable(ctx
, GL_DEPTH_TEST
, false);
457 _mesa_DrawArrays(GL_TRIANGLE_FAN
, 0, 4);
460 ctx
->Extensions
.ARB_texture_stencil8
= false;
461 _mesa_meta_fb_tex_blit_end(ctx
, target
, &blit
);
464 _mesa_DeleteRenderbuffers(1, &rbo
);
465 _mesa_DeleteFramebuffers(1, &fbo
);
469 brw_meta_fbo_stencil_blit(struct brw_context
*brw
,
470 struct gl_framebuffer
*read_fb
,
471 struct gl_framebuffer
*draw_fb
,
472 GLfloat src_x0
, GLfloat src_y0
,
473 GLfloat src_x1
, GLfloat src_y1
,
474 GLfloat dst_x0
, GLfloat dst_y0
,
475 GLfloat dst_x1
, GLfloat dst_y1
)
477 struct gl_context
*ctx
= &brw
->ctx
;
478 struct gl_renderbuffer
*draw_rb
=
479 draw_fb
->Attachment
[BUFFER_STENCIL
].Renderbuffer
;
480 const struct intel_renderbuffer
*dst_irb
= intel_renderbuffer(draw_rb
);
481 struct intel_mipmap_tree
*dst_mt
= dst_irb
->mt
;
486 if (dst_mt
->stencil_mt
)
487 dst_mt
= dst_mt
->stencil_mt
;
489 bool mirror_x
, mirror_y
;
490 if (brw_meta_mirror_clip_and_scissor(ctx
, read_fb
, draw_fb
,
491 &src_x0
, &src_y0
, &src_x1
, &src_y1
,
492 &dst_x0
, &dst_y0
, &dst_x1
, &dst_y1
,
493 &mirror_x
, &mirror_y
))
496 struct blit_dims dims
= { .src_x0
= src_x0
, .src_y0
= src_y0
,
497 .src_x1
= src_x1
, .src_y1
= src_y1
,
498 .dst_x0
= dst_x0
, .dst_y0
= dst_y0
,
499 .dst_x1
= dst_x1
, .dst_y1
= dst_y1
,
500 .mirror_x
= mirror_x
, .mirror_y
= mirror_y
};
501 adjust_mip_level(dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
, &dims
);
503 brw_emit_mi_flush(brw
);
504 _mesa_meta_begin(ctx
, MESA_META_ALL
);
505 brw_meta_stencil_blit(brw
,
506 dst_mt
, dst_irb
->mt_level
, dst_irb
->mt_layer
, &dims
);
507 brw_emit_mi_flush(brw
);
511 brw_meta_stencil_updownsample(struct brw_context
*brw
,
512 struct intel_mipmap_tree
*src
,
513 struct intel_mipmap_tree
*dst
)
515 struct gl_context
*ctx
= &brw
->ctx
;
516 struct blit_dims dims
= {
517 .src_x0
= 0, .src_y0
= 0,
518 .src_x1
= src
->logical_width0
, .src_y1
= src
->logical_height0
,
519 .dst_x0
= 0, .dst_y0
= 0,
520 .dst_x1
= dst
->logical_width0
, .dst_y1
= dst
->logical_height0
,
521 .mirror_x
= 0, .mirror_y
= 0 };
525 dst
= dst
->stencil_mt
;
527 brw_emit_mi_flush(brw
);
528 _mesa_meta_begin(ctx
, MESA_META_ALL
);
530 _mesa_GenFramebuffers(1, &fbo
);
531 rbo
= brw_get_rb_for_slice(brw
, src
, 0, 0, false);
533 _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER
, fbo
);
534 _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER
, GL_STENCIL_ATTACHMENT
,
535 GL_RENDERBUFFER
, rbo
);
537 brw_meta_stencil_blit(brw
, dst
, 0, 0, &dims
);
538 brw_emit_mi_flush(brw
);
540 _mesa_DeleteRenderbuffers(1, &rbo
);
541 _mesa_DeleteFramebuffers(1, &fbo
);