util: Move ralloc to a new src/util directory.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_meta_stencil_blit.c
1 /*
2 * Copyright © 2014 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /**
25 * @file brw_meta_stencil_blit.c
26 *
27 * Implements upsampling, downsampling and scaling of stencil miptrees. The
28 * logic can be originally found in brw_blorp_blit.c.
29 * Implementation creates a temporary draw framebuffer object and attaches the
30 * destination stencil buffer attachment as color attachment. Source attachment
31 * is in turn treated as a stencil texture and the glsl program used for the
32 * blitting samples it using stencil-indexing.
33 *
34 * Unfortunately as the data port does not support interleaved msaa-surfaces
35 * (stencil is always IMS), the glsl program needs to handle the writing of
36 * individual samples manually. Surface is configured as if it were single
37 * sampled (with adjusted dimensions) and the glsl program extracts the
38 * sample indices from the input coordinates for correct texturing.
39 *
40 * Target surface is also configured as Y-tiled instead of W-tiled in order
41 * to support generations 6-7. Later hardware supports W-tiled as render target
42 * and the logic here could be simplified for those.
43 */
44
45 #include "brw_context.h"
46 #include "intel_batchbuffer.h"
47 #include "intel_fbo.h"
48
49 #include "main/blit.h"
50 #include "main/buffers.h"
51 #include "main/fbobject.h"
52 #include "main/uniforms.h"
53 #include "main/texparam.h"
54 #include "main/texobj.h"
55 #include "main/viewport.h"
56 #include "main/enable.h"
57 #include "main/blend.h"
58 #include "main/varray.h"
59 #include "main/shaderapi.h"
60 #include "util/ralloc.h"
61
62 #include "drivers/common/meta.h"
63 #include "brw_meta_util.h"
64
65 #define FILE_DEBUG_FLAG DEBUG_FBO
66
67 struct blit_dims {
68 int src_x0, src_y0, src_x1, src_y1;
69 int dst_x0, dst_y0, dst_x1, dst_y1;
70 bool mirror_x, mirror_y;
71 };
72
73 static const char *vs_source =
74 "#version 130\n"
75 "in vec2 position;\n"
76 "out vec2 tex_coords;\n"
77 "void main()\n"
78 "{\n"
79 " tex_coords = (position + 1.0) / 2.0;\n"
80 " gl_Position = vec4(position, 0.0, 1.0);\n"
81 "}\n";
82
83 static const struct sampler_and_fetch {
84 const char *sampler;
85 const char *fetch;
86 } samplers[] = {
87 { "uniform usampler2D texSampler;\n",
88 " out_color = texelFetch(texSampler, txl_coords, 0)" },
89 { "#extension GL_ARB_texture_multisample : enable\n"
90 "uniform usampler2DMS texSampler;\n",
91 " out_color = texelFetch(texSampler, txl_coords, sample_index)" }
92 };
93
94 /**
95 * Translating Y-tiled to W-tiled:
96 *
97 * X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1
98 * Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1
99 */
100 static const char *fs_tmpl =
101 "#version 130\n"
102 "%s"
103 "uniform float src_x_scale;\n"
104 "uniform float src_y_scale;\n"
105 "uniform float src_x_off;\n" /* Top right coordinates of the source */
106 "uniform float src_y_off;\n" /* rectangle in W-tiled space. */
107 "uniform float dst_x_off;\n" /* Top right coordinates of the target */
108 "uniform float dst_y_off;\n" /* rectangle in Y-tiled space. */
109 "uniform float draw_rect_w;\n" /* This is the unnormalized size of the */
110 "uniform float draw_rect_h;\n" /* drawing rectangle in Y-tiled space. */
111 "uniform int dst_x0;\n" /* This is the bounding rectangle in the W-tiled */
112 "uniform int dst_x1;\n" /* space that will be used to skip pixels lying */
113 "uniform int dst_y0;\n" /* outside. In some cases the Y-tiled rectangle */
114 "uniform int dst_y1;\n" /* is larger. */
115 "uniform int dst_num_samples;\n"
116 "in vec2 tex_coords;\n"
117 "ivec2 txl_coords;\n"
118 "int sample_index;\n"
119 "out uvec4 out_color;\n"
120 "\n"
121 "void get_unorm_target_coords()\n"
122 "{\n"
123 " txl_coords.x = int(tex_coords.x * draw_rect_w + dst_x_off);\n"
124 " txl_coords.y = int(tex_coords.y * draw_rect_h + dst_y_off);\n"
125 "}\n"
126 "\n"
127 "void translate_dst_to_src()\n"
128 "{\n"
129 " txl_coords.x = int(float(txl_coords.x) * src_x_scale + src_x_off);\n"
130 " txl_coords.y = int(float(txl_coords.y) * src_y_scale + src_y_off);\n"
131 "}\n"
132 "\n"
133 "void translate_y_to_w_tiling()\n"
134 "{\n"
135 " int X = txl_coords.x;\n"
136 " int Y = txl_coords.y;\n"
137 " txl_coords.x = (X & int(0xfff4)) >> 1;\n"
138 " txl_coords.x |= ((Y & int(0x1)) << 2);\n"
139 " txl_coords.x |= (X & int(0x1));\n"
140 " txl_coords.y = (Y & int(0xfffe)) << 1;\n"
141 " txl_coords.y |= ((X & int(0x8)) >> 2);\n"
142 " txl_coords.y |= ((X & int(0x2)) >> 1);\n"
143 "}\n"
144 "\n"
145 "void decode_msaa()\n"
146 "{\n"
147 " int X = txl_coords.x;\n"
148 " int Y = txl_coords.y;\n"
149 " switch (dst_num_samples) {\n"
150 " case 0:\n"
151 " sample_index = 0;\n"
152 " break;\n"
153 " case 2:\n"
154 " txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
155 " sample_index = (X & 0x2) >> 1;\n"
156 " break;\n"
157 " case 4:\n"
158 " txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
159 " txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
160 " sample_index = (Y & 0x2) | ((X & 0x2) >> 1);\n"
161 " break;\n"
162 " case 8:\n"
163 " txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
164 " txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
165 " sample_index = (X & 0x4) | (Y & 0x2) | ((X & 0x2) >> 1);\n"
166 " }\n"
167 "}\n"
168 "\n"
169 "void discard_outside_bounding_rect()\n"
170 "{\n"
171 " int X = txl_coords.x;\n"
172 " int Y = txl_coords.y;\n"
173 " if (X >= dst_x1 || X < dst_x0 || Y >= dst_y1 || Y < dst_y0)\n"
174 " discard;\n"
175 "}\n"
176 "\n"
177 "void main()\n"
178 "{\n"
179 " get_unorm_target_coords();\n"
180 " translate_y_to_w_tiling();\n"
181 " decode_msaa();"
182 " discard_outside_bounding_rect();\n"
183 " translate_dst_to_src();\n"
184 " %s;\n"
185 "}\n";
186
187 /**
188 * Setup uniforms telling the coordinates of the destination rectangle in the
189 * native w-tiled space. These are needed to ignore pixels that lie outside.
190 * The destination is drawn as Y-tiled and in some cases the Y-tiled drawing
191 * rectangle is larger than the original (for example 1x4 w-tiled requires
192 * 16x2 y-tiled).
193 */
194 static void
195 setup_bounding_rect(GLuint prog, const struct blit_dims *dims)
196 {
197 _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_x0"), dims->dst_x0);
198 _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_x1"), dims->dst_x1);
199 _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_y0"), dims->dst_y0);
200 _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_y1"), dims->dst_y1);
201 }
202
203 /**
204 * Setup uniforms telling the destination width, height and the offset. These
205 * are needed to unnoormalize the input coordinates and to correctly translate
206 * between destination and source that may have differing offsets.
207 */
208 static void
209 setup_drawing_rect(GLuint prog, const struct blit_dims *dims)
210 {
211 _mesa_Uniform1f(_mesa_GetUniformLocation(prog, "draw_rect_w"),
212 dims->dst_x1 - dims->dst_x0);
213 _mesa_Uniform1f(_mesa_GetUniformLocation(prog, "draw_rect_h"),
214 dims->dst_y1 - dims->dst_y0);
215 _mesa_Uniform1f(_mesa_GetUniformLocation(prog, "dst_x_off"), dims->dst_x0);
216 _mesa_Uniform1f(_mesa_GetUniformLocation(prog, "dst_y_off"), dims->dst_y0);
217 }
218
219 /**
220 * When not mirroring a coordinate (say, X), we need:
221 * src_x - src_x0 = (dst_x - dst_x0 + 0.5) * scale
222 * Therefore:
223 * src_x = src_x0 + (dst_x - dst_x0 + 0.5) * scale
224 *
225 * The program uses "round toward zero" to convert the transformed floating
226 * point coordinates to integer coordinates, whereas the behaviour we actually
227 * want is "round to nearest", so 0.5 provides the necessary correction.
228 *
229 * When mirroring X we need:
230 * src_x - src_x0 = dst_x1 - dst_x - 0.5
231 * Therefore:
232 * src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
233 */
234 static void
235 setup_coord_coeff(GLuint prog, GLuint multiplier, GLuint offset,
236 int src_0, int src_1, int dst_0, int dst_1, bool mirror)
237 {
238 const float scale = ((float)(src_1 - src_0)) / (dst_1 - dst_0);
239
240 if (mirror) {
241 _mesa_Uniform1f(multiplier, -scale);
242 _mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5) * scale);
243 } else {
244 _mesa_Uniform1f(multiplier, scale);
245 _mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5) * scale);
246 }
247 }
248
249 /**
250 * Setup uniforms providing relation between source and destination surfaces.
251 * Destination coordinates are in Y-tiling layout while texelFetch() expects
252 * W-tiled coordinates. Once the destination coordinates are re-interpreted by
253 * the program into the original W-tiled layout, the program needs to know the
254 * offset and scaling factors between the destination and source.
255 * Note that these are calculated in the original W-tiled space before the
256 * destination rectangle is adjusted for possible msaa and Y-tiling.
257 */
258 static void
259 setup_coord_transform(GLuint prog, const struct blit_dims *dims)
260 {
261 setup_coord_coeff(prog,
262 _mesa_GetUniformLocation(prog, "src_x_scale"),
263 _mesa_GetUniformLocation(prog, "src_x_off"),
264 dims->src_x0, dims->src_x1, dims->dst_x0, dims->dst_x1,
265 dims->mirror_x);
266
267 setup_coord_coeff(prog,
268 _mesa_GetUniformLocation(prog, "src_y_scale"),
269 _mesa_GetUniformLocation(prog, "src_y_off"),
270 dims->src_y0, dims->src_y1, dims->dst_y0, dims->dst_y1,
271 dims->mirror_y);
272 }
273
274 static GLuint
275 setup_program(struct brw_context *brw, bool msaa_tex)
276 {
277 struct gl_context *ctx = &brw->ctx;
278 struct blit_state *blit = &ctx->Meta->Blit;
279 char *fs_source;
280 const struct sampler_and_fetch *sampler = &samplers[msaa_tex];
281
282 _mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true, 2, 2, 0);
283
284 GLuint *prog_id = &brw->meta_stencil_blit_programs[msaa_tex];
285
286 if (*prog_id) {
287 _mesa_UseProgram(*prog_id);
288 return *prog_id;
289 }
290
291 fs_source = ralloc_asprintf(NULL, fs_tmpl, sampler->sampler,
292 sampler->fetch);
293 _mesa_meta_compile_and_link_program(ctx, vs_source, fs_source,
294 "i965 stencil blit",
295 prog_id);
296 ralloc_free(fs_source);
297
298 return *prog_id;
299 }
300
301 /**
302 * Samples in stencil buffer are interleaved, and unfortunately the data port
303 * does not support it as render target. Therefore the surface is set up as
304 * single sampled and the program handles the interleaving.
305 * In case of single sampled stencil, the render buffer is adjusted with
306 * twice the base level height in order for the program to be able to write
307 * any mip-level. (Used to set the drawing rectangle for the hw).
308 */
309 static void
310 adjust_msaa(struct blit_dims *dims, int num_samples)
311 {
312 if (num_samples == 2) {
313 dims->dst_x0 *= 2;
314 dims->dst_x1 *= 2;
315 } else if (num_samples) {
316 const int x_num_samples = num_samples / 2;
317 dims->dst_x0 = ROUND_DOWN_TO(dims->dst_x0 * x_num_samples, num_samples);
318 dims->dst_y0 = ROUND_DOWN_TO(dims->dst_y0 * 2, 4);
319 dims->dst_x1 = ALIGN(dims->dst_x1 * x_num_samples, num_samples);
320 dims->dst_y1 = ALIGN(dims->dst_y1 * 2, 4);
321 }
322 }
323
324 /**
325 * Stencil is mapped as Y-tiled render target and the dimensions need to be
326 * adjusted in order for the Y-tiled rectangle to cover the entire linear
327 * memory space of the original W-tiled rectangle.
328 */
329 static void
330 adjust_tiling(struct blit_dims *dims, int num_samples)
331 {
332 const unsigned x_align = 8, y_align = num_samples > 2 ? 8 : 4;
333
334 dims->dst_x0 = ROUND_DOWN_TO(dims->dst_x0, x_align) * 2;
335 dims->dst_y0 = ROUND_DOWN_TO(dims->dst_y0, y_align) / 2;
336 dims->dst_x1 = ALIGN(dims->dst_x1, x_align) * 2;
337 dims->dst_y1 = ALIGN(dims->dst_y1, y_align) / 2;
338 }
339
340 /**
341 * When stencil is mapped as Y-tiled render target the mip-level offsets
342 * calculated for the Y-tiling do not always match the offsets in W-tiling.
343 * Therefore the sampling engine cannot be used for individual mip-level
344 * access but the program needs to do it internally. This can be achieved
345 * by shifting the coordinates of the blit rectangle here.
346 */
347 static void
348 adjust_mip_level(const struct intel_mipmap_tree *mt,
349 unsigned level, unsigned layer, struct blit_dims *dims)
350 {
351 unsigned x_offset;
352 unsigned y_offset;
353
354 intel_miptree_get_image_offset(mt, level, layer, &x_offset, &y_offset);
355
356 dims->dst_x0 += x_offset;
357 dims->dst_y0 += y_offset;
358 dims->dst_x1 += x_offset;
359 dims->dst_y1 += y_offset;
360 }
361
362 static void
363 prepare_vertex_data(void)
364 {
365 static const struct vertex verts[] = {
366 { .x = -1.0f, .y = -1.0f },
367 { .x = 1.0f, .y = -1.0f },
368 { .x = 1.0f, .y = 1.0f },
369 { .x = -1.0f, .y = 1.0f } };
370
371 _mesa_BufferSubData(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
372 }
373
374 static void
375 set_read_rb_tex_image(struct gl_context *ctx, struct fb_tex_blit_state *blit,
376 GLenum *target)
377 {
378 const struct gl_renderbuffer_attachment *att =
379 &ctx->ReadBuffer->Attachment[BUFFER_STENCIL];
380 struct gl_renderbuffer *rb = att->Renderbuffer;
381 struct gl_texture_object *tex_obj;
382 unsigned level = 0;
383
384 /* If the renderbuffer is already backed by an tex image, use it. */
385 if (att->Texture) {
386 tex_obj = att->Texture;
387 *target = tex_obj->Target;
388 level = att->TextureLevel;
389 } else {
390 _mesa_meta_bind_rb_as_tex_image(ctx, rb, &blit->tempTex, &tex_obj,
391 target);
392 }
393
394 blit->baseLevelSave = tex_obj->BaseLevel;
395 blit->maxLevelSave = tex_obj->MaxLevel;
396 blit->stencilSamplingSave = tex_obj->StencilSampling;
397 blit->sampler = _mesa_meta_setup_sampler(ctx, tex_obj, *target,
398 GL_NEAREST, level);
399 }
400
401 static void
402 brw_meta_stencil_blit(struct brw_context *brw,
403 struct intel_mipmap_tree *dst_mt,
404 unsigned dst_level, unsigned dst_layer,
405 const struct blit_dims *orig_dims)
406 {
407 struct gl_context *ctx = &brw->ctx;
408 struct blit_dims dims = *orig_dims;
409 struct fb_tex_blit_state blit;
410 GLuint prog, fbo, rbo;
411 GLenum target;
412
413 _mesa_meta_fb_tex_blit_begin(ctx, &blit);
414
415 _mesa_GenFramebuffers(1, &fbo);
416 /* Force the surface to be configured for level zero. */
417 rbo = brw_get_rb_for_slice(brw, dst_mt, 0, dst_layer, true);
418 adjust_msaa(&dims, dst_mt->num_samples);
419 adjust_tiling(&dims, dst_mt->num_samples);
420
421 _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
422 _mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
423 GL_RENDERBUFFER, rbo);
424 _mesa_DrawBuffer(GL_COLOR_ATTACHMENT0);
425 ctx->DrawBuffer->_Status = GL_FRAMEBUFFER_COMPLETE;
426
427 set_read_rb_tex_image(ctx, &blit, &target);
428
429 _mesa_TexParameteri(target, GL_DEPTH_STENCIL_TEXTURE_MODE,
430 GL_STENCIL_INDEX);
431
432 prog = setup_program(brw, target != GL_TEXTURE_2D);
433 setup_bounding_rect(prog, orig_dims);
434 setup_drawing_rect(prog, &dims);
435 setup_coord_transform(prog, orig_dims);
436
437 _mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_num_samples"),
438 dst_mt->num_samples);
439
440 prepare_vertex_data();
441 _mesa_set_viewport(ctx, 0, dims.dst_x0, dims.dst_y0,
442 dims.dst_x1 - dims.dst_x0, dims.dst_y1 - dims.dst_y0);
443 _mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
444 _mesa_set_enable(ctx, GL_DEPTH_TEST, false);
445
446 _mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
447
448 _mesa_meta_fb_tex_blit_end(ctx, target, &blit);
449 _mesa_meta_end(ctx);
450
451 _mesa_DeleteRenderbuffers(1, &rbo);
452 _mesa_DeleteFramebuffers(1, &fbo);
453 }
454
455 void
456 brw_meta_fbo_stencil_blit(struct brw_context *brw,
457 GLfloat src_x0, GLfloat src_y0,
458 GLfloat src_x1, GLfloat src_y1,
459 GLfloat dst_x0, GLfloat dst_y0,
460 GLfloat dst_x1, GLfloat dst_y1)
461 {
462 struct gl_context *ctx = &brw->ctx;
463 struct gl_renderbuffer *draw_fb =
464 ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
465 const struct intel_renderbuffer *dst_irb = intel_renderbuffer(draw_fb);
466 struct intel_mipmap_tree *dst_mt = dst_irb->mt;
467
468 if (!dst_mt)
469 return;
470
471 if (dst_mt->stencil_mt)
472 dst_mt = dst_mt->stencil_mt;
473
474 bool mirror_x, mirror_y;
475 if (brw_meta_mirror_clip_and_scissor(ctx,
476 &src_x0, &src_y0, &src_x1, &src_y1,
477 &dst_x0, &dst_y0, &dst_x1, &dst_y1,
478 &mirror_x, &mirror_y))
479 return;
480
481 struct blit_dims dims = { .src_x0 = src_x0, .src_y0 = src_y0,
482 .src_x1 = src_x1, .src_y1 = src_y1,
483 .dst_x0 = dst_x0, .dst_y0 = dst_y0,
484 .dst_x1 = dst_x1, .dst_y1 = dst_y1,
485 .mirror_x = mirror_x, .mirror_y = mirror_y };
486 adjust_mip_level(dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
487
488 intel_batchbuffer_emit_mi_flush(brw);
489 _mesa_meta_begin(ctx, MESA_META_ALL);
490 brw_meta_stencil_blit(brw,
491 dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
492 intel_batchbuffer_emit_mi_flush(brw);
493 }
494
495 void
496 brw_meta_stencil_updownsample(struct brw_context *brw,
497 struct intel_mipmap_tree *src,
498 struct intel_mipmap_tree *dst)
499 {
500 struct gl_context *ctx = &brw->ctx;
501 struct blit_dims dims = {
502 .src_x0 = 0, .src_y0 = 0,
503 .src_x1 = src->logical_width0, .src_y1 = src->logical_height0,
504 .dst_x0 = 0, .dst_y0 = 0,
505 .dst_x1 = dst->logical_width0, .dst_y1 = dst->logical_height0,
506 .mirror_x = 0, .mirror_y = 0 };
507 GLuint fbo, rbo;
508
509 if (dst->stencil_mt)
510 dst = dst->stencil_mt;
511
512 intel_batchbuffer_emit_mi_flush(brw);
513 _mesa_meta_begin(ctx, MESA_META_ALL);
514
515 _mesa_GenFramebuffers(1, &fbo);
516 rbo = brw_get_rb_for_slice(brw, src, 0, 0, false);
517
518 _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbo);
519 _mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT,
520 GL_RENDERBUFFER, rbo);
521
522 brw_meta_stencil_blit(brw, dst, 0, 0, &dims);
523 intel_batchbuffer_emit_mi_flush(brw);
524
525 _mesa_DeleteRenderbuffers(1, &rbo);
526 _mesa_DeleteFramebuffers(1, &fbo);
527 }