i965: Use the brw_context for the clear color and value setters
[mesa.git] / src / mesa / drivers / dri / i965 / brw_blorp.c
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "main/context.h"
25 #include "main/teximage.h"
26 #include "main/blend.h"
27 #include "main/bufferobj.h"
28 #include "main/enums.h"
29 #include "main/fbobject.h"
30 #include "main/image.h"
31 #include "main/renderbuffer.h"
32 #include "main/glformats.h"
33
34 #include "brw_blorp.h"
35 #include "brw_context.h"
36 #include "brw_defines.h"
37 #include "brw_meta_util.h"
38 #include "brw_state.h"
39 #include "intel_buffer_objects.h"
40 #include "intel_fbo.h"
41 #include "common/gen_debug.h"
42
43 #define FILE_DEBUG_FLAG DEBUG_BLORP
44
45 static bool
46 brw_blorp_lookup_shader(struct blorp_context *blorp,
47 const void *key, uint32_t key_size,
48 uint32_t *kernel_out, void *prog_data_out)
49 {
50 struct brw_context *brw = blorp->driver_ctx;
51 return brw_search_cache(&brw->cache, BRW_CACHE_BLORP_PROG,
52 key, key_size, kernel_out, prog_data_out);
53 }
54
55 static bool
56 brw_blorp_upload_shader(struct blorp_context *blorp,
57 const void *key, uint32_t key_size,
58 const void *kernel, uint32_t kernel_size,
59 const struct brw_stage_prog_data *prog_data,
60 uint32_t prog_data_size,
61 uint32_t *kernel_out, void *prog_data_out)
62 {
63 struct brw_context *brw = blorp->driver_ctx;
64 brw_upload_cache(&brw->cache, BRW_CACHE_BLORP_PROG, key, key_size,
65 kernel, kernel_size, prog_data, prog_data_size,
66 kernel_out, prog_data_out);
67 return true;
68 }
69
70 void
71 brw_blorp_init(struct brw_context *brw)
72 {
73 const struct gen_device_info *devinfo = &brw->screen->devinfo;
74
75 blorp_init(&brw->blorp, brw, &brw->isl_dev);
76
77 brw->blorp.compiler = brw->screen->compiler;
78
79 switch (devinfo->gen) {
80 case 4:
81 if (devinfo->is_g4x) {
82 brw->blorp.exec = gen45_blorp_exec;
83 } else {
84 brw->blorp.exec = gen4_blorp_exec;
85 }
86 break;
87 case 5:
88 brw->blorp.exec = gen5_blorp_exec;
89 break;
90 case 6:
91 brw->blorp.exec = gen6_blorp_exec;
92 break;
93 case 7:
94 if (devinfo->is_haswell) {
95 brw->blorp.exec = gen75_blorp_exec;
96 } else {
97 brw->blorp.exec = gen7_blorp_exec;
98 }
99 break;
100 case 8:
101 brw->blorp.exec = gen8_blorp_exec;
102 break;
103 case 9:
104 brw->blorp.exec = gen9_blorp_exec;
105 break;
106 case 10:
107 brw->blorp.exec = gen10_blorp_exec;
108 break;
109 case 11:
110 brw->blorp.exec = gen11_blorp_exec;
111 break;
112
113 default:
114 unreachable("Invalid gen");
115 }
116
117 brw->blorp.lookup_shader = brw_blorp_lookup_shader;
118 brw->blorp.upload_shader = brw_blorp_upload_shader;
119 }
120
121 static void
122 blorp_surf_for_miptree(struct brw_context *brw,
123 struct blorp_surf *surf,
124 struct intel_mipmap_tree *mt,
125 enum isl_aux_usage aux_usage,
126 bool is_render_target,
127 unsigned *level,
128 unsigned start_layer, unsigned num_layers,
129 struct isl_surf tmp_surfs[1])
130 {
131 const struct gen_device_info *devinfo = &brw->screen->devinfo;
132
133 if (mt->surf.msaa_layout == ISL_MSAA_LAYOUT_ARRAY) {
134 const unsigned num_samples = mt->surf.samples;
135 for (unsigned i = 0; i < num_layers; i++) {
136 for (unsigned s = 0; s < num_samples; s++) {
137 const unsigned phys_layer = (start_layer + i) * num_samples + s;
138 intel_miptree_check_level_layer(mt, *level, phys_layer);
139 }
140 }
141 } else {
142 for (unsigned i = 0; i < num_layers; i++)
143 intel_miptree_check_level_layer(mt, *level, start_layer + i);
144 }
145
146 *surf = (struct blorp_surf) {
147 .surf = &mt->surf,
148 .addr = (struct blorp_address) {
149 .buffer = mt->bo,
150 .offset = mt->offset,
151 .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
152 .mocs = brw_get_bo_mocs(devinfo, mt->bo),
153 },
154 .aux_usage = aux_usage,
155 };
156
157 struct intel_miptree_aux_buffer *aux_buf = NULL;
158 if (mt->mcs_buf)
159 aux_buf = mt->mcs_buf;
160 else if (mt->hiz_buf)
161 aux_buf = mt->hiz_buf;
162
163 if (mt->format == MESA_FORMAT_S_UINT8 && is_render_target &&
164 devinfo->gen <= 7)
165 mt->r8stencil_needs_update = true;
166
167 if (surf->aux_usage == ISL_AUX_USAGE_HIZ &&
168 !intel_miptree_level_has_hiz(mt, *level))
169 surf->aux_usage = ISL_AUX_USAGE_NONE;
170
171 if (surf->aux_usage != ISL_AUX_USAGE_NONE) {
172 /* We only really need a clear color if we also have an auxiliary
173 * surface. Without one, it does nothing.
174 */
175 surf->clear_color = mt->fast_clear_color;
176
177 surf->aux_surf = &aux_buf->surf;
178 surf->aux_addr = (struct blorp_address) {
179 .reloc_flags = is_render_target ? EXEC_OBJECT_WRITE : 0,
180 .mocs = surf->addr.mocs,
181 };
182
183 surf->aux_addr.buffer = aux_buf->bo;
184 surf->aux_addr.offset = aux_buf->offset;
185
186 if (devinfo->gen >= 10) {
187 surf->clear_color_addr = (struct blorp_address) {
188 .buffer = aux_buf->clear_color_bo,
189 .offset = aux_buf->clear_color_offset,
190 };
191 }
192 } else {
193 surf->aux_addr = (struct blorp_address) {
194 .buffer = NULL,
195 };
196 memset(&surf->clear_color, 0, sizeof(surf->clear_color));
197 }
198 assert((surf->aux_usage == ISL_AUX_USAGE_NONE) ==
199 (surf->aux_addr.buffer == NULL));
200
201 /* ISL wants real levels, not offset ones. */
202 *level -= mt->first_level;
203 }
204
205 static enum isl_format
206 brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format,
207 bool is_render_target)
208 {
209 switch (format) {
210 case MESA_FORMAT_NONE:
211 return ISL_FORMAT_UNSUPPORTED;
212 case MESA_FORMAT_S_UINT8:
213 return ISL_FORMAT_R8_UINT;
214 case MESA_FORMAT_Z24_UNORM_X8_UINT:
215 case MESA_FORMAT_Z24_UNORM_S8_UINT:
216 return ISL_FORMAT_R24_UNORM_X8_TYPELESS;
217 case MESA_FORMAT_Z_FLOAT32:
218 case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
219 return ISL_FORMAT_R32_FLOAT;
220 case MESA_FORMAT_Z_UNORM16:
221 return ISL_FORMAT_R16_UNORM;
222 default: {
223 if (is_render_target) {
224 assert(brw->mesa_format_supports_render[format]);
225 return brw->mesa_to_isl_render_format[format];
226 } else {
227 return brw_isl_format_for_mesa_format(format);
228 }
229 break;
230 }
231 }
232 }
233
234 /**
235 * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
236 * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are
237 *
238 * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
239 * 0 1 2 3 4 5
240 * 4 5 6 7 0 1
241 * SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, SCS_ZERO, SCS_ONE
242 *
243 * which is simply adding 4 then modding by 8 (or anding with 7).
244 *
245 * We then may need to apply workarounds for textureGather hardware bugs.
246 */
247 static enum isl_channel_select
248 swizzle_to_scs(GLenum swizzle)
249 {
250 return (enum isl_channel_select)((swizzle + 4) & 7);
251 }
252
253 /**
254 * Note: if the src (or dst) is a 2D multisample array texture on Gen7+ using
255 * INTEL_MSAA_LAYOUT_UMS or INTEL_MSAA_LAYOUT_CMS, src_layer (dst_layer) is
256 * the physical layer holding sample 0. So, for example, if
257 * src_mt->surf.samples == 4, then logical layer n corresponds to src_layer ==
258 * 4*n.
259 */
260 void
261 brw_blorp_blit_miptrees(struct brw_context *brw,
262 struct intel_mipmap_tree *src_mt,
263 unsigned src_level, unsigned src_layer,
264 mesa_format src_format, int src_swizzle,
265 struct intel_mipmap_tree *dst_mt,
266 unsigned dst_level, unsigned dst_layer,
267 mesa_format dst_format,
268 float src_x0, float src_y0,
269 float src_x1, float src_y1,
270 float dst_x0, float dst_y0,
271 float dst_x1, float dst_y1,
272 GLenum filter, bool mirror_x, bool mirror_y,
273 bool decode_srgb, bool encode_srgb)
274 {
275 const struct gen_device_info *devinfo = &brw->screen->devinfo;
276
277 DBG("%s from %dx %s mt %p %d %d (%f,%f) (%f,%f)"
278 "to %dx %s mt %p %d %d (%f,%f) (%f,%f) (flip %d,%d)\n",
279 __func__,
280 src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt,
281 src_level, src_layer, src_x0, src_y0, src_x1, src_y1,
282 dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt,
283 dst_level, dst_layer, dst_x0, dst_y0, dst_x1, dst_y1,
284 mirror_x, mirror_y);
285
286 if (!decode_srgb && _mesa_get_format_color_encoding(src_format) == GL_SRGB)
287 src_format = _mesa_get_srgb_format_linear(src_format);
288
289 if (!encode_srgb && _mesa_get_format_color_encoding(dst_format) == GL_SRGB)
290 dst_format = _mesa_get_srgb_format_linear(dst_format);
291
292 /* When doing a multisample resolve of a GL_LUMINANCE32F or GL_INTENSITY32F
293 * texture, the above code configures the source format for L32_FLOAT or
294 * I32_FLOAT, and the destination format for R32_FLOAT. On Sandy Bridge,
295 * the SAMPLE message appears to handle multisampled L32_FLOAT and
296 * I32_FLOAT textures incorrectly, resulting in blocky artifacts. So work
297 * around the problem by using a source format of R32_FLOAT. This
298 * shouldn't affect rendering correctness, since the destination format is
299 * R32_FLOAT, so only the contents of the red channel matters.
300 */
301 if (devinfo->gen == 6 &&
302 src_mt->surf.samples > 1 && dst_mt->surf.samples <= 1 &&
303 src_mt->format == dst_mt->format &&
304 (dst_format == MESA_FORMAT_L_FLOAT32 ||
305 dst_format == MESA_FORMAT_I_FLOAT32)) {
306 src_format = dst_format = MESA_FORMAT_R_FLOAT32;
307 }
308
309 enum isl_format src_isl_format =
310 brw_blorp_to_isl_format(brw, src_format, false);
311 enum isl_aux_usage src_aux_usage =
312 intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format);
313 /* We do format workarounds for some depth formats so we can't reliably
314 * sample with HiZ. One of these days, we should fix that.
315 */
316 if (src_aux_usage == ISL_AUX_USAGE_HIZ)
317 src_aux_usage = ISL_AUX_USAGE_NONE;
318 const bool src_clear_supported =
319 src_aux_usage != ISL_AUX_USAGE_NONE && src_mt->format == src_format;
320 intel_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1,
321 src_aux_usage, src_clear_supported);
322
323 enum isl_format dst_isl_format =
324 brw_blorp_to_isl_format(brw, dst_format, true);
325 enum isl_aux_usage dst_aux_usage =
326 intel_miptree_render_aux_usage(brw, dst_mt, dst_isl_format,
327 false, false);
328 const bool dst_clear_supported = dst_aux_usage != ISL_AUX_USAGE_NONE;
329 intel_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1,
330 dst_aux_usage, dst_clear_supported);
331
332 struct isl_surf tmp_surfs[2];
333 struct blorp_surf src_surf, dst_surf;
334 blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false,
335 &src_level, src_layer, 1, &tmp_surfs[0]);
336 blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true,
337 &dst_level, dst_layer, 1, &tmp_surfs[1]);
338
339 struct isl_swizzle src_isl_swizzle = {
340 .r = swizzle_to_scs(GET_SWZ(src_swizzle, 0)),
341 .g = swizzle_to_scs(GET_SWZ(src_swizzle, 1)),
342 .b = swizzle_to_scs(GET_SWZ(src_swizzle, 2)),
343 .a = swizzle_to_scs(GET_SWZ(src_swizzle, 3)),
344 };
345
346 struct blorp_batch batch;
347 blorp_batch_init(&brw->blorp, &batch, brw, 0);
348 blorp_blit(&batch, &src_surf, src_level, src_layer,
349 src_isl_format, src_isl_swizzle,
350 &dst_surf, dst_level, dst_layer,
351 dst_isl_format, ISL_SWIZZLE_IDENTITY,
352 src_x0, src_y0, src_x1, src_y1,
353 dst_x0, dst_y0, dst_x1, dst_y1,
354 filter, mirror_x, mirror_y);
355 blorp_batch_finish(&batch);
356
357 intel_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1,
358 dst_aux_usage);
359 }
360
361 void
362 brw_blorp_copy_miptrees(struct brw_context *brw,
363 struct intel_mipmap_tree *src_mt,
364 unsigned src_level, unsigned src_layer,
365 struct intel_mipmap_tree *dst_mt,
366 unsigned dst_level, unsigned dst_layer,
367 unsigned src_x, unsigned src_y,
368 unsigned dst_x, unsigned dst_y,
369 unsigned src_width, unsigned src_height)
370 {
371 const struct gen_device_info *devinfo = &brw->screen->devinfo;
372
373 DBG("%s from %dx %s mt %p %d %d (%d,%d) %dx%d"
374 "to %dx %s mt %p %d %d (%d,%d)\n",
375 __func__,
376 src_mt->surf.samples, _mesa_get_format_name(src_mt->format), src_mt,
377 src_level, src_layer, src_x, src_y, src_width, src_height,
378 dst_mt->surf.samples, _mesa_get_format_name(dst_mt->format), dst_mt,
379 dst_level, dst_layer, dst_x, dst_y);
380
381 enum isl_aux_usage src_aux_usage, dst_aux_usage;
382 bool src_clear_supported, dst_clear_supported;
383
384 switch (src_mt->aux_usage) {
385 case ISL_AUX_USAGE_MCS:
386 case ISL_AUX_USAGE_CCS_E:
387 src_aux_usage = src_mt->aux_usage;
388 /* Prior to gen9, fast-clear only supported 0/1 clear colors. Since
389 * we're going to re-interpret the format as an integer format possibly
390 * with a different number of components, we can't handle clear colors
391 * until gen9.
392 */
393 src_clear_supported = devinfo->gen >= 9;
394 break;
395 default:
396 src_aux_usage = ISL_AUX_USAGE_NONE;
397 src_clear_supported = false;
398 break;
399 }
400
401 switch (dst_mt->aux_usage) {
402 case ISL_AUX_USAGE_MCS:
403 case ISL_AUX_USAGE_CCS_E:
404 dst_aux_usage = dst_mt->aux_usage;
405 /* Prior to gen9, fast-clear only supported 0/1 clear colors. Since
406 * we're going to re-interpret the format as an integer format possibly
407 * with a different number of components, we can't handle clear colors
408 * until gen9.
409 */
410 dst_clear_supported = devinfo->gen >= 9;
411 break;
412 default:
413 dst_aux_usage = ISL_AUX_USAGE_NONE;
414 dst_clear_supported = false;
415 break;
416 }
417
418 intel_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1,
419 src_aux_usage, src_clear_supported);
420 intel_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1,
421 dst_aux_usage, dst_clear_supported);
422
423 struct isl_surf tmp_surfs[2];
424 struct blorp_surf src_surf, dst_surf;
425 blorp_surf_for_miptree(brw, &src_surf, src_mt, src_aux_usage, false,
426 &src_level, src_layer, 1, &tmp_surfs[0]);
427 blorp_surf_for_miptree(brw, &dst_surf, dst_mt, dst_aux_usage, true,
428 &dst_level, dst_layer, 1, &tmp_surfs[1]);
429
430 /* The hardware seems to have issues with having a two different format
431 * views of the same texture in the sampler cache at the same time. It's
432 * unclear exactly what the issue is but it hurts glCopyImageSubData
433 * particularly badly because it does a lot of format reinterprets. We
434 * badly need better understanding of the issue and a better fix but this
435 * works for now and fixes CTS tests.
436 *
437 * TODO: Remove this hack!
438 */
439 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL |
440 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
441
442 struct blorp_batch batch;
443 blorp_batch_init(&brw->blorp, &batch, brw, 0);
444 blorp_copy(&batch, &src_surf, src_level, src_layer,
445 &dst_surf, dst_level, dst_layer,
446 src_x, src_y, dst_x, dst_y, src_width, src_height);
447 blorp_batch_finish(&batch);
448
449 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL |
450 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
451
452 intel_miptree_finish_write(brw, dst_mt, dst_level, dst_layer, 1,
453 dst_aux_usage);
454 }
455
456 void
457 brw_blorp_copy_buffers(struct brw_context *brw,
458 struct brw_bo *src_bo,
459 unsigned src_offset,
460 struct brw_bo *dst_bo,
461 unsigned dst_offset,
462 unsigned size)
463 {
464 DBG("%s %d bytes from %p[%d] to %p[%d]",
465 __func__, size, src_bo, src_offset, dst_bo, dst_offset);
466
467 struct blorp_batch batch;
468 struct blorp_address src = { .buffer = src_bo, .offset = src_offset };
469 struct blorp_address dst = { .buffer = dst_bo, .offset = dst_offset };
470
471 blorp_batch_init(&brw->blorp, &batch, brw, 0);
472 blorp_buffer_copy(&batch, src, dst, size);
473 blorp_batch_finish(&batch);
474 }
475
476
477 static struct intel_mipmap_tree *
478 find_miptree(GLbitfield buffer_bit, struct intel_renderbuffer *irb)
479 {
480 struct intel_mipmap_tree *mt = irb->mt;
481 if (buffer_bit == GL_STENCIL_BUFFER_BIT && mt->stencil_mt)
482 mt = mt->stencil_mt;
483 return mt;
484 }
485
486 static int
487 blorp_get_texture_swizzle(const struct intel_renderbuffer *irb)
488 {
489 return irb->Base.Base._BaseFormat == GL_RGB ?
490 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_ONE) :
491 SWIZZLE_XYZW;
492 }
493
494 static void
495 do_blorp_blit(struct brw_context *brw, GLbitfield buffer_bit,
496 struct intel_renderbuffer *src_irb, mesa_format src_format,
497 struct intel_renderbuffer *dst_irb, mesa_format dst_format,
498 GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
499 GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
500 GLenum filter, bool mirror_x, bool mirror_y)
501 {
502 const struct gl_context *ctx = &brw->ctx;
503
504 /* Find source/dst miptrees */
505 struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
506 struct intel_mipmap_tree *dst_mt = find_miptree(buffer_bit, dst_irb);
507
508 const bool do_srgb = ctx->Color.sRGBEnabled;
509
510 /* Do the blit */
511 brw_blorp_blit_miptrees(brw,
512 src_mt, src_irb->mt_level, src_irb->mt_layer,
513 src_format, blorp_get_texture_swizzle(src_irb),
514 dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
515 dst_format,
516 srcX0, srcY0, srcX1, srcY1,
517 dstX0, dstY0, dstX1, dstY1,
518 filter, mirror_x, mirror_y,
519 do_srgb, do_srgb);
520
521 dst_irb->need_downsample = true;
522 }
523
524 static bool
525 try_blorp_blit(struct brw_context *brw,
526 const struct gl_framebuffer *read_fb,
527 const struct gl_framebuffer *draw_fb,
528 GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
529 GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
530 GLenum filter, GLbitfield buffer_bit)
531 {
532 const struct gen_device_info *devinfo = &brw->screen->devinfo;
533 struct gl_context *ctx = &brw->ctx;
534
535 /* Sync up the state of window system buffers. We need to do this before
536 * we go looking for the buffers.
537 */
538 intel_prepare_render(brw);
539
540 bool mirror_x, mirror_y;
541 if (brw_meta_mirror_clip_and_scissor(ctx, read_fb, draw_fb,
542 &srcX0, &srcY0, &srcX1, &srcY1,
543 &dstX0, &dstY0, &dstX1, &dstY1,
544 &mirror_x, &mirror_y))
545 return true;
546
547 /* Find buffers */
548 struct intel_renderbuffer *src_irb;
549 struct intel_renderbuffer *dst_irb;
550 struct intel_mipmap_tree *src_mt;
551 struct intel_mipmap_tree *dst_mt;
552 switch (buffer_bit) {
553 case GL_COLOR_BUFFER_BIT:
554 src_irb = intel_renderbuffer(read_fb->_ColorReadBuffer);
555 for (unsigned i = 0; i < draw_fb->_NumColorDrawBuffers; ++i) {
556 dst_irb = intel_renderbuffer(draw_fb->_ColorDrawBuffers[i]);
557 if (dst_irb)
558 do_blorp_blit(brw, buffer_bit,
559 src_irb, src_irb->Base.Base.Format,
560 dst_irb, dst_irb->Base.Base.Format,
561 srcX0, srcY0, srcX1, srcY1,
562 dstX0, dstY0, dstX1, dstY1,
563 filter, mirror_x, mirror_y);
564 }
565 break;
566 case GL_DEPTH_BUFFER_BIT:
567 src_irb =
568 intel_renderbuffer(read_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
569 dst_irb =
570 intel_renderbuffer(draw_fb->Attachment[BUFFER_DEPTH].Renderbuffer);
571 src_mt = find_miptree(buffer_bit, src_irb);
572 dst_mt = find_miptree(buffer_bit, dst_irb);
573
574 /* We can't handle format conversions between Z24 and other formats
575 * since we have to lie about the surface format. See the comments in
576 * brw_blorp_surface_info::set().
577 */
578 if ((src_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT) !=
579 (dst_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT))
580 return false;
581
582 /* We also can't handle any combined depth-stencil formats because we
583 * have to reinterpret as a color format.
584 */
585 if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL ||
586 _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL)
587 return false;
588
589 do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE,
590 dst_irb, MESA_FORMAT_NONE, srcX0, srcY0,
591 srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
592 filter, mirror_x, mirror_y);
593 break;
594 case GL_STENCIL_BUFFER_BIT:
595 /* Blorp doesn't support combined depth stencil which is all we have
596 * prior to gen6.
597 */
598 if (devinfo->gen < 6)
599 return false;
600
601 src_irb =
602 intel_renderbuffer(read_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
603 dst_irb =
604 intel_renderbuffer(draw_fb->Attachment[BUFFER_STENCIL].Renderbuffer);
605 do_blorp_blit(brw, buffer_bit, src_irb, MESA_FORMAT_NONE,
606 dst_irb, MESA_FORMAT_NONE, srcX0, srcY0,
607 srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
608 filter, mirror_x, mirror_y);
609 break;
610 default:
611 unreachable("not reached");
612 }
613
614 return true;
615 }
616
617 static void
618 apply_y_flip(int *y0, int *y1, int height)
619 {
620 int tmp = height - *y0;
621 *y0 = height - *y1;
622 *y1 = tmp;
623 }
624
625 bool
626 brw_blorp_copytexsubimage(struct brw_context *brw,
627 struct gl_renderbuffer *src_rb,
628 struct gl_texture_image *dst_image,
629 int slice,
630 int srcX0, int srcY0,
631 int dstX0, int dstY0,
632 int width, int height)
633 {
634 struct gl_context *ctx = &brw->ctx;
635 struct intel_renderbuffer *src_irb = intel_renderbuffer(src_rb);
636 struct intel_texture_image *intel_image = intel_texture_image(dst_image);
637
638 /* No pixel transfer operations (zoom, bias, mapping), just a blit */
639 if (brw->ctx._ImageTransferState)
640 return false;
641
642 /* Sync up the state of window system buffers. We need to do this before
643 * we go looking at the src renderbuffer's miptree.
644 */
645 intel_prepare_render(brw);
646
647 struct intel_mipmap_tree *src_mt = src_irb->mt;
648 struct intel_mipmap_tree *dst_mt = intel_image->mt;
649
650 /* There is support for only up to eight samples. */
651 if (src_mt->surf.samples > 8 || dst_mt->surf.samples > 8)
652 return false;
653
654 if (_mesa_get_format_base_format(src_rb->Format) !=
655 _mesa_get_format_base_format(dst_image->TexFormat)) {
656 return false;
657 }
658
659 /* We can't handle format conversions between Z24 and other formats since
660 * we have to lie about the surface format. See the comments in
661 * brw_blorp_surface_info::set().
662 */
663 if ((src_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT) !=
664 (dst_mt->format == MESA_FORMAT_Z24_UNORM_X8_UINT)) {
665 return false;
666 }
667
668 /* We also can't handle any combined depth-stencil formats because we
669 * have to reinterpret as a color format.
670 */
671 if (_mesa_get_format_base_format(src_mt->format) == GL_DEPTH_STENCIL ||
672 _mesa_get_format_base_format(dst_mt->format) == GL_DEPTH_STENCIL)
673 return false;
674
675 if (!brw->mesa_format_supports_render[dst_image->TexFormat])
676 return false;
677
678 /* Source clipping shouldn't be necessary, since copytexsubimage (in
679 * src/mesa/main/teximage.c) calls _mesa_clip_copytexsubimage() which
680 * takes care of it.
681 *
682 * Destination clipping shouldn't be necessary since the restrictions on
683 * glCopyTexSubImage prevent the user from specifying a destination rectangle
684 * that falls outside the bounds of the destination texture.
685 * See error_check_subtexture_dimensions().
686 */
687
688 int srcY1 = srcY0 + height;
689 int srcX1 = srcX0 + width;
690 int dstX1 = dstX0 + width;
691 int dstY1 = dstY0 + height;
692
693 /* Account for the fact that in the system framebuffer, the origin is at
694 * the lower left.
695 */
696 bool mirror_y = _mesa_is_winsys_fbo(ctx->ReadBuffer);
697 if (mirror_y)
698 apply_y_flip(&srcY0, &srcY1, src_rb->Height);
699
700 /* Account for face selection and texture view MinLayer */
701 int dst_slice = slice + dst_image->TexObject->MinLayer + dst_image->Face;
702 int dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
703
704 brw_blorp_blit_miptrees(brw,
705 src_mt, src_irb->mt_level, src_irb->mt_layer,
706 src_rb->Format, blorp_get_texture_swizzle(src_irb),
707 dst_mt, dst_level, dst_slice,
708 dst_image->TexFormat,
709 srcX0, srcY0, srcX1, srcY1,
710 dstX0, dstY0, dstX1, dstY1,
711 GL_NEAREST, false, mirror_y,
712 false, false);
713
714 /* If we're copying to a packed depth stencil texture and the source
715 * framebuffer has separate stencil, we need to also copy the stencil data
716 * over.
717 */
718 src_rb = ctx->ReadBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
719 if (_mesa_get_format_bits(dst_image->TexFormat, GL_STENCIL_BITS) > 0 &&
720 src_rb != NULL) {
721 src_irb = intel_renderbuffer(src_rb);
722 src_mt = src_irb->mt;
723
724 if (src_mt->stencil_mt)
725 src_mt = src_mt->stencil_mt;
726 if (dst_mt->stencil_mt)
727 dst_mt = dst_mt->stencil_mt;
728
729 if (src_mt != dst_mt) {
730 brw_blorp_blit_miptrees(brw,
731 src_mt, src_irb->mt_level, src_irb->mt_layer,
732 src_mt->format,
733 blorp_get_texture_swizzle(src_irb),
734 dst_mt, dst_level, dst_slice,
735 dst_mt->format,
736 srcX0, srcY0, srcX1, srcY1,
737 dstX0, dstY0, dstX1, dstY1,
738 GL_NEAREST, false, mirror_y,
739 false, false);
740 }
741 }
742
743 return true;
744 }
745
746
747 GLbitfield
748 brw_blorp_framebuffer(struct brw_context *brw,
749 struct gl_framebuffer *readFb,
750 struct gl_framebuffer *drawFb,
751 GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
752 GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
753 GLbitfield mask, GLenum filter)
754 {
755 static GLbitfield buffer_bits[] = {
756 GL_COLOR_BUFFER_BIT,
757 GL_DEPTH_BUFFER_BIT,
758 GL_STENCIL_BUFFER_BIT,
759 };
760
761 for (unsigned int i = 0; i < ARRAY_SIZE(buffer_bits); ++i) {
762 if ((mask & buffer_bits[i]) &&
763 try_blorp_blit(brw, readFb, drawFb,
764 srcX0, srcY0, srcX1, srcY1,
765 dstX0, dstY0, dstX1, dstY1,
766 filter, buffer_bits[i])) {
767 mask &= ~buffer_bits[i];
768 }
769 }
770
771 return mask;
772 }
773
774 static struct brw_bo *
775 blorp_get_client_bo(struct brw_context *brw,
776 unsigned w, unsigned h, unsigned d,
777 GLenum target, GLenum format, GLenum type,
778 const void *pixels,
779 const struct gl_pixelstore_attrib *packing,
780 uint32_t *offset_out, uint32_t *row_stride_out,
781 uint32_t *image_stride_out, bool read_only)
782 {
783 /* Account for SKIP_PIXELS, SKIP_ROWS, ALIGNMENT, and SKIP_IMAGES */
784 const GLuint dims = _mesa_get_texture_dimensions(target);
785 const uint32_t first_pixel = _mesa_image_offset(dims, packing, w, h,
786 format, type, 0, 0, 0);
787 const uint32_t last_pixel = _mesa_image_offset(dims, packing, w, h,
788 format, type,
789 d - 1, h - 1, w);
790 const uint32_t stride = _mesa_image_row_stride(packing, w, format, type);
791 const uint32_t cpp = _mesa_bytes_per_pixel(format, type);
792 const uint32_t size = last_pixel - first_pixel;
793
794 *row_stride_out = stride;
795 *image_stride_out = _mesa_image_image_stride(packing, w, h, format, type);
796
797 if (_mesa_is_bufferobj(packing->BufferObj)) {
798 const uint32_t offset = first_pixel + (intptr_t)pixels;
799 if (!read_only && ((offset % cpp) || (stride % cpp))) {
800 perf_debug("Bad PBO alignment; fallback to CPU mapping\n");
801 return NULL;
802 }
803
804 /* This is a user-provided PBO. We just need to get the BO out */
805 struct intel_buffer_object *intel_pbo =
806 intel_buffer_object(packing->BufferObj);
807 struct brw_bo *bo =
808 intel_bufferobj_buffer(brw, intel_pbo, offset, size, !read_only);
809
810 /* We take a reference to the BO so that the caller can just always
811 * unref without having to worry about whether it's a user PBO or one
812 * we created.
813 */
814 brw_bo_reference(bo);
815
816 *offset_out = offset;
817 return bo;
818 } else {
819 /* Someone should have already checked that there is data to upload. */
820 assert(pixels);
821
822 /* Creating a temp buffer currently only works for upload */
823 assert(read_only);
824
825 /* This is not a user-provided PBO. Instead, pixels is a pointer to CPU
826 * data which we need to copy into a BO.
827 */
828 struct brw_bo *bo =
829 brw_bo_alloc(brw->bufmgr, "tmp_tex_subimage_src", size);
830 if (bo == NULL) {
831 perf_debug("intel_texsubimage: temp bo creation failed: size = %u\n",
832 size);
833 return NULL;
834 }
835
836 if (brw_bo_subdata(bo, 0, size, pixels + first_pixel)) {
837 perf_debug("intel_texsubimage: temp bo upload failed\n");
838 brw_bo_unreference(bo);
839 return NULL;
840 }
841
842 *offset_out = 0;
843 return bo;
844 }
845 }
846
847 /* Consider all the restrictions and determine the format of the source. */
848 static mesa_format
849 blorp_get_client_format(struct brw_context *brw,
850 GLenum format, GLenum type,
851 const struct gl_pixelstore_attrib *packing)
852 {
853 if (brw->ctx._ImageTransferState)
854 return MESA_FORMAT_NONE;
855
856 if (packing->SwapBytes || packing->LsbFirst || packing->Invert) {
857 perf_debug("intel_texsubimage_blorp: unsupported gl_pixelstore_attrib\n");
858 return MESA_FORMAT_NONE;
859 }
860
861 if (format != GL_RED &&
862 format != GL_RG &&
863 format != GL_RGB &&
864 format != GL_BGR &&
865 format != GL_RGBA &&
866 format != GL_BGRA &&
867 format != GL_ALPHA &&
868 format != GL_RED_INTEGER &&
869 format != GL_RG_INTEGER &&
870 format != GL_RGB_INTEGER &&
871 format != GL_BGR_INTEGER &&
872 format != GL_RGBA_INTEGER &&
873 format != GL_BGRA_INTEGER) {
874 perf_debug("intel_texsubimage_blorp: %s not supported",
875 _mesa_enum_to_string(format));
876 return MESA_FORMAT_NONE;
877 }
878
879 return _mesa_tex_format_from_format_and_type(&brw->ctx, format, type);
880 }
881
882 static bool
883 need_signed_unsigned_int_conversion(mesa_format src_format,
884 mesa_format dst_format)
885 {
886 const GLenum src_type = _mesa_get_format_datatype(src_format);
887 const GLenum dst_type = _mesa_get_format_datatype(dst_format);
888 return (src_type == GL_INT && dst_type == GL_UNSIGNED_INT) ||
889 (src_type == GL_UNSIGNED_INT && dst_type == GL_INT);
890 }
891
892 bool
893 brw_blorp_upload_miptree(struct brw_context *brw,
894 struct intel_mipmap_tree *dst_mt,
895 mesa_format dst_format,
896 uint32_t level, uint32_t x, uint32_t y, uint32_t z,
897 uint32_t width, uint32_t height, uint32_t depth,
898 GLenum target, GLenum format, GLenum type,
899 const void *pixels,
900 const struct gl_pixelstore_attrib *packing)
901 {
902 const mesa_format src_format =
903 blorp_get_client_format(brw, format, type, packing);
904 if (src_format == MESA_FORMAT_NONE)
905 return false;
906
907 if (!brw->mesa_format_supports_render[dst_format]) {
908 perf_debug("intel_texsubimage: can't use %s as render target\n",
909 _mesa_get_format_name(dst_format));
910 return false;
911 }
912
913 /* This function relies on blorp_blit to upload the pixel data to the
914 * miptree. But, blorp_blit doesn't support signed to unsigned or
915 * unsigned to signed integer conversions.
916 */
917 if (need_signed_unsigned_int_conversion(src_format, dst_format))
918 return false;
919
920 uint32_t src_offset, src_row_stride, src_image_stride;
921 struct brw_bo *src_bo =
922 blorp_get_client_bo(brw, width, height, depth,
923 target, format, type, pixels, packing,
924 &src_offset, &src_row_stride,
925 &src_image_stride, true);
926 if (src_bo == NULL)
927 return false;
928
929 /* Now that source is offset to correct starting point, adjust the
930 * given dimensions to treat 1D arrays as 2D.
931 */
932 if (target == GL_TEXTURE_1D_ARRAY) {
933 assert(depth == 1);
934 assert(z == 0);
935 depth = height;
936 height = 1;
937 z = y;
938 y = 0;
939 src_image_stride = src_row_stride;
940 }
941
942 intel_miptree_check_level_layer(dst_mt, level, z + depth - 1);
943
944 bool result = false;
945
946 /* Blit slice-by-slice creating a single-slice miptree for each layer. Even
947 * in case of linear buffers hardware wants image arrays to be aligned by
948 * four rows. This way hardware only gets one image at a time and any
949 * source alignment will do.
950 */
951 for (unsigned i = 0; i < depth; ++i) {
952 struct intel_mipmap_tree *src_mt = intel_miptree_create_for_bo(
953 brw, src_bo, src_format,
954 src_offset + i * src_image_stride,
955 width, height, 1,
956 src_row_stride,
957 ISL_TILING_LINEAR, 0);
958
959 if (!src_mt) {
960 perf_debug("intel_texsubimage: miptree creation for src failed\n");
961 goto err;
962 }
963
964 /* In case exact match is needed, copy using equivalent UINT formats
965 * preventing hardware from changing presentation for SNORM -1.
966 */
967 if (src_mt->format == dst_format) {
968 brw_blorp_copy_miptrees(brw, src_mt, 0, 0,
969 dst_mt, level, z + i,
970 0, 0, x, y, width, height);
971 } else {
972 brw_blorp_blit_miptrees(brw, src_mt, 0, 0,
973 src_format, SWIZZLE_XYZW,
974 dst_mt, level, z + i,
975 dst_format,
976 0, 0, width, height,
977 x, y, x + width, y + height,
978 GL_NEAREST, false, false, false, false);
979 }
980
981 intel_miptree_release(&src_mt);
982 }
983
984 result = true;
985
986 err:
987 brw_bo_unreference(src_bo);
988
989 return result;
990 }
991
992 bool
993 brw_blorp_download_miptree(struct brw_context *brw,
994 struct intel_mipmap_tree *src_mt,
995 mesa_format src_format, uint32_t src_swizzle,
996 uint32_t level, uint32_t x, uint32_t y, uint32_t z,
997 uint32_t width, uint32_t height, uint32_t depth,
998 GLenum target, GLenum format, GLenum type,
999 bool y_flip, const void *pixels,
1000 const struct gl_pixelstore_attrib *packing)
1001 {
1002 const mesa_format dst_format =
1003 blorp_get_client_format(brw, format, type, packing);
1004 if (dst_format == MESA_FORMAT_NONE)
1005 return false;
1006
1007 if (!brw->mesa_format_supports_render[dst_format]) {
1008 perf_debug("intel_texsubimage: can't use %s as render target\n",
1009 _mesa_get_format_name(dst_format));
1010 return false;
1011 }
1012
1013 /* This function relies on blorp_blit to download the pixel data from the
1014 * miptree. But, blorp_blit doesn't support signed to unsigned or unsigned
1015 * to signed integer conversions.
1016 */
1017 if (need_signed_unsigned_int_conversion(src_format, dst_format))
1018 return false;
1019
1020 /* We can't fetch from LUMINANCE or intensity as that would require a
1021 * non-trivial swizzle.
1022 */
1023 switch (_mesa_get_format_base_format(src_format)) {
1024 case GL_LUMINANCE:
1025 case GL_LUMINANCE_ALPHA:
1026 case GL_INTENSITY:
1027 return false;
1028 default:
1029 break;
1030 }
1031
1032 /* This pass only works for PBOs */
1033 assert(_mesa_is_bufferobj(packing->BufferObj));
1034
1035 uint32_t dst_offset, dst_row_stride, dst_image_stride;
1036 struct brw_bo *dst_bo =
1037 blorp_get_client_bo(brw, width, height, depth,
1038 target, format, type, pixels, packing,
1039 &dst_offset, &dst_row_stride,
1040 &dst_image_stride, false);
1041 if (dst_bo == NULL)
1042 return false;
1043
1044 /* Now that source is offset to correct starting point, adjust the
1045 * given dimensions to treat 1D arrays as 2D.
1046 */
1047 if (target == GL_TEXTURE_1D_ARRAY) {
1048 assert(depth == 1);
1049 assert(z == 0);
1050 depth = height;
1051 height = 1;
1052 z = y;
1053 y = 0;
1054 dst_image_stride = dst_row_stride;
1055 }
1056
1057 intel_miptree_check_level_layer(src_mt, level, z + depth - 1);
1058
1059 int y0 = y;
1060 int y1 = y + height;
1061 if (y_flip) {
1062 apply_y_flip(&y0, &y1, minify(src_mt->surf.phys_level0_sa.height,
1063 level - src_mt->first_level));
1064 }
1065
1066 bool result = false;
1067
1068 /* Blit slice-by-slice creating a single-slice miptree for each layer. Even
1069 * in case of linear buffers hardware wants image arrays to be aligned by
1070 * four rows. This way hardware only gets one image at a time and any
1071 * source alignment will do.
1072 */
1073 for (unsigned i = 0; i < depth; ++i) {
1074 struct intel_mipmap_tree *dst_mt = intel_miptree_create_for_bo(
1075 brw, dst_bo, dst_format,
1076 dst_offset + i * dst_image_stride,
1077 width, height, 1,
1078 dst_row_stride,
1079 ISL_TILING_LINEAR, 0);
1080
1081 if (!dst_mt) {
1082 perf_debug("intel_texsubimage: miptree creation for src failed\n");
1083 goto err;
1084 }
1085
1086 /* In case exact match is needed, copy using equivalent UINT formats
1087 * preventing hardware from changing presentation for SNORM -1.
1088 */
1089 if (dst_mt->format == src_format && !y_flip &&
1090 src_swizzle == SWIZZLE_XYZW) {
1091 brw_blorp_copy_miptrees(brw, src_mt, level, z + i,
1092 dst_mt, 0, 0,
1093 x, y, 0, 0, width, height);
1094 } else {
1095 brw_blorp_blit_miptrees(brw, src_mt, level, z + i,
1096 src_format, src_swizzle,
1097 dst_mt, 0, 0, dst_format,
1098 x, y0, x + width, y1,
1099 0, 0, width, height,
1100 GL_NEAREST, false, y_flip, false, false);
1101 }
1102
1103 intel_miptree_release(&dst_mt);
1104 }
1105
1106 result = true;
1107
1108 /* As we implement PBO transfers by binding the user-provided BO as a
1109 * fake framebuffer and rendering to it. This breaks the invariant of the
1110 * GL that nothing is able to render to a BO, causing nondeterministic
1111 * corruption issues because the render cache is not coherent with a
1112 * number of other caches that the BO could potentially be bound to
1113 * afterwards.
1114 *
1115 * This could be solved in the same way that we guarantee texture
1116 * coherency after a texture is attached to a framebuffer and
1117 * rendered to, but that would involve checking *all* BOs bound to
1118 * the pipeline for the case we need to emit a cache flush due to
1119 * previous rendering to any of them -- Including vertex, index,
1120 * uniform, atomic counter, shader image, transform feedback,
1121 * indirect draw buffers, etc.
1122 *
1123 * That would increase the per-draw call overhead even though it's
1124 * very unlikely that any of the BOs bound to the pipeline has been
1125 * rendered to via a PBO at any point, so it seems better to just
1126 * flush here unconditionally.
1127 */
1128 brw_emit_mi_flush(brw);
1129
1130 err:
1131 brw_bo_unreference(dst_bo);
1132
1133 return result;
1134 }
1135
1136 static bool
1137 set_write_disables(const struct intel_renderbuffer *irb,
1138 const unsigned color_mask, bool *color_write_disable)
1139 {
1140 /* Format information in the renderbuffer represents the requirements
1141 * given by the client. There are cases where the backing miptree uses,
1142 * for example, RGBA to represent RGBX. Since the client is only expecting
1143 * RGB we can treat alpha as not used and write whatever we like into it.
1144 */
1145 const GLenum base_format = irb->Base.Base._BaseFormat;
1146 const int components = _mesa_base_format_component_count(base_format);
1147 bool disables = false;
1148
1149 assert(components > 0);
1150
1151 for (int i = 0; i < components; i++) {
1152 color_write_disable[i] = !(color_mask & (1 << i));
1153 disables = disables || color_write_disable[i];
1154 }
1155
1156 return disables;
1157 }
1158
1159 static void
1160 do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
1161 struct gl_renderbuffer *rb, unsigned buf,
1162 bool partial_clear, bool encode_srgb)
1163 {
1164 struct gl_context *ctx = &brw->ctx;
1165 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1166 uint32_t x0, x1, y0, y1;
1167
1168 mesa_format format = irb->Base.Base.Format;
1169 if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB)
1170 format = _mesa_get_srgb_format_linear(format);
1171 enum isl_format isl_format = brw->mesa_to_isl_render_format[format];
1172
1173 x0 = fb->_Xmin;
1174 x1 = fb->_Xmax;
1175 if (rb->Name != 0) {
1176 y0 = fb->_Ymin;
1177 y1 = fb->_Ymax;
1178 } else {
1179 y0 = rb->Height - fb->_Ymax;
1180 y1 = rb->Height - fb->_Ymin;
1181 }
1182
1183 /* If the clear region is empty, just return. */
1184 if (x0 == x1 || y0 == y1)
1185 return;
1186
1187 bool can_fast_clear = !partial_clear;
1188
1189 bool color_write_disable[4] = { false, false, false, false };
1190 if (set_write_disables(irb, GET_COLORMASK(ctx->Color.ColorMask, buf),
1191 color_write_disable))
1192 can_fast_clear = false;
1193
1194 /* We store clear colors as floats or uints as needed. If there are
1195 * texture views in play, the formats will not properly be respected
1196 * during resolves because the resolve operations only know about the
1197 * miptree and not the renderbuffer.
1198 */
1199 if (irb->Base.Base.Format != irb->mt->format)
1200 can_fast_clear = false;
1201
1202 if (!irb->mt->supports_fast_clear ||
1203 !brw_is_color_fast_clear_compatible(brw, irb->mt, &ctx->Color.ClearColor))
1204 can_fast_clear = false;
1205
1206 /* Surface state can only record one fast clear color value. Therefore
1207 * unless different levels/layers agree on the color it can be used to
1208 * represent only single level/layer. Here it will be reserved for the
1209 * first slice (level 0, layer 0).
1210 */
1211 if (irb->layer_count > 1 || irb->mt_level || irb->mt_layer)
1212 can_fast_clear = false;
1213
1214 unsigned level = irb->mt_level;
1215 const unsigned num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
1216
1217 /* If the MCS buffer hasn't been allocated yet, we need to allocate it now.
1218 */
1219 if (can_fast_clear && !irb->mt->mcs_buf) {
1220 assert(irb->mt->aux_usage == ISL_AUX_USAGE_CCS_D);
1221 if (!intel_miptree_alloc_ccs(brw, irb->mt)) {
1222 /* There are a few reasons in addition to out-of-memory, that can
1223 * cause intel_miptree_alloc_non_msrt_mcs to fail. Try to recover by
1224 * falling back to non-fast clear.
1225 */
1226 can_fast_clear = false;
1227 }
1228 }
1229
1230 /* FINISHME: Debug and enable fast clears */
1231 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1232 if (devinfo->gen >= 11)
1233 can_fast_clear = false;
1234
1235 if (can_fast_clear) {
1236 const enum isl_aux_state aux_state =
1237 intel_miptree_get_aux_state(irb->mt, irb->mt_level, irb->mt_layer);
1238 union isl_color_value clear_color =
1239 brw_meta_convert_fast_clear_color(brw, irb->mt,
1240 &ctx->Color.ClearColor);
1241
1242 bool same_clear_color =
1243 !intel_miptree_set_clear_color(brw, irb->mt, clear_color);
1244
1245 /* If the buffer is already in INTEL_FAST_CLEAR_STATE_CLEAR, the clear
1246 * is redundant and can be skipped.
1247 */
1248 if (aux_state == ISL_AUX_STATE_CLEAR && same_clear_color)
1249 return;
1250
1251 DBG("%s (fast) to mt %p level %d layers %d+%d\n", __FUNCTION__,
1252 irb->mt, irb->mt_level, irb->mt_layer, num_layers);
1253
1254 /* We can't setup the blorp_surf until we've allocated the MCS above */
1255 struct isl_surf isl_tmp[2];
1256 struct blorp_surf surf;
1257 blorp_surf_for_miptree(brw, &surf, irb->mt, irb->mt->aux_usage, true,
1258 &level, irb->mt_layer, num_layers, isl_tmp);
1259
1260 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
1261 *
1262 * "Any transition from any value in {Clear, Render, Resolve} to a
1263 * different value in {Clear, Render, Resolve} requires end of pipe
1264 * synchronization."
1265 *
1266 * In other words, fast clear ops are not properly synchronized with
1267 * other drawing. We need to use a PIPE_CONTROL to ensure that the
1268 * contents of the previous draw hit the render target before we resolve
1269 * and again afterwards to ensure that the resolve is complete before we
1270 * do any more regular drawing.
1271 */
1272 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
1273
1274 struct blorp_batch batch;
1275 blorp_batch_init(&brw->blorp, &batch, brw, 0);
1276 blorp_fast_clear(&batch, &surf, isl_format,
1277 level, irb->mt_layer, num_layers,
1278 x0, y0, x1, y1);
1279 blorp_batch_finish(&batch);
1280
1281 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
1282
1283 /* Now that the fast clear has occurred, put the buffer in
1284 * INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
1285 * redundant clears.
1286 */
1287 intel_miptree_set_aux_state(brw, irb->mt, irb->mt_level,
1288 irb->mt_layer, num_layers,
1289 ISL_AUX_STATE_CLEAR);
1290 } else {
1291 DBG("%s (slow) to mt %p level %d layer %d+%d\n", __FUNCTION__,
1292 irb->mt, irb->mt_level, irb->mt_layer, num_layers);
1293
1294 enum isl_aux_usage aux_usage =
1295 intel_miptree_render_aux_usage(brw, irb->mt, isl_format,
1296 false, false);
1297 intel_miptree_prepare_render(brw, irb->mt, level, irb->mt_layer,
1298 num_layers, aux_usage);
1299
1300 struct isl_surf isl_tmp[2];
1301 struct blorp_surf surf;
1302 blorp_surf_for_miptree(brw, &surf, irb->mt, aux_usage, true,
1303 &level, irb->mt_layer, num_layers, isl_tmp);
1304
1305 union isl_color_value clear_color;
1306 memcpy(clear_color.f32, ctx->Color.ClearColor.f, sizeof(float) * 4);
1307
1308 struct blorp_batch batch;
1309 blorp_batch_init(&brw->blorp, &batch, brw, 0);
1310 blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
1311 level, irb->mt_layer, num_layers,
1312 x0, y0, x1, y1,
1313 clear_color, color_write_disable);
1314 blorp_batch_finish(&batch);
1315
1316 intel_miptree_finish_render(brw, irb->mt, level, irb->mt_layer,
1317 num_layers, aux_usage);
1318 }
1319
1320 return;
1321 }
1322
1323 void
1324 brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
1325 GLbitfield mask, bool partial_clear, bool encode_srgb)
1326 {
1327 for (unsigned buf = 0; buf < fb->_NumColorDrawBuffers; buf++) {
1328 struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[buf];
1329 struct intel_renderbuffer *irb = intel_renderbuffer(rb);
1330
1331 /* Only clear the buffers present in the provided mask */
1332 if (((1 << fb->_ColorDrawBufferIndexes[buf]) & mask) == 0)
1333 continue;
1334
1335 /* If this is an ES2 context or GL_ARB_ES2_compatibility is supported,
1336 * the framebuffer can be complete with some attachments missing. In
1337 * this case the _ColorDrawBuffers pointer will be NULL.
1338 */
1339 if (rb == NULL)
1340 continue;
1341
1342 do_single_blorp_clear(brw, fb, rb, buf, partial_clear, encode_srgb);
1343 irb->need_downsample = true;
1344 }
1345
1346 return;
1347 }
1348
1349 void
1350 brw_blorp_clear_depth_stencil(struct brw_context *brw,
1351 struct gl_framebuffer *fb,
1352 GLbitfield mask, bool partial_clear)
1353 {
1354 const struct gl_context *ctx = &brw->ctx;
1355 struct gl_renderbuffer *depth_rb =
1356 fb->Attachment[BUFFER_DEPTH].Renderbuffer;
1357 struct gl_renderbuffer *stencil_rb =
1358 fb->Attachment[BUFFER_STENCIL].Renderbuffer;
1359
1360 if (!depth_rb || ctx->Depth.Mask == GL_FALSE)
1361 mask &= ~BUFFER_BIT_DEPTH;
1362
1363 if (!stencil_rb || (ctx->Stencil.WriteMask[0] & 0xff) == 0)
1364 mask &= ~BUFFER_BIT_STENCIL;
1365
1366 if (!(mask & (BUFFER_BITS_DEPTH_STENCIL)))
1367 return;
1368
1369 uint32_t x0, x1, y0, y1, rb_name, rb_height;
1370 if (depth_rb) {
1371 rb_name = depth_rb->Name;
1372 rb_height = depth_rb->Height;
1373 if (stencil_rb) {
1374 assert(depth_rb->Width == stencil_rb->Width);
1375 assert(depth_rb->Height == stencil_rb->Height);
1376 }
1377 } else {
1378 assert(stencil_rb);
1379 rb_name = stencil_rb->Name;
1380 rb_height = stencil_rb->Height;
1381 }
1382
1383 x0 = fb->_Xmin;
1384 x1 = fb->_Xmax;
1385 if (rb_name != 0) {
1386 y0 = fb->_Ymin;
1387 y1 = fb->_Ymax;
1388 } else {
1389 y0 = rb_height - fb->_Ymax;
1390 y1 = rb_height - fb->_Ymin;
1391 }
1392
1393 /* If the clear region is empty, just return. */
1394 if (x0 == x1 || y0 == y1)
1395 return;
1396
1397 uint32_t level, start_layer, num_layers;
1398 struct isl_surf isl_tmp[4];
1399 struct blorp_surf depth_surf, stencil_surf;
1400
1401 struct intel_mipmap_tree *depth_mt = NULL;
1402 if (mask & BUFFER_BIT_DEPTH) {
1403 struct intel_renderbuffer *irb = intel_renderbuffer(depth_rb);
1404 depth_mt = find_miptree(GL_DEPTH_BUFFER_BIT, irb);
1405
1406 level = irb->mt_level;
1407 start_layer = irb->mt_layer;
1408 num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
1409
1410 intel_miptree_prepare_depth(brw, depth_mt, level,
1411 start_layer, num_layers);
1412
1413 unsigned depth_level = level;
1414 blorp_surf_for_miptree(brw, &depth_surf, depth_mt, depth_mt->aux_usage,
1415 true, &depth_level, start_layer, num_layers,
1416 &isl_tmp[0]);
1417 assert(depth_level == level);
1418 }
1419
1420 uint8_t stencil_mask = 0;
1421 struct intel_mipmap_tree *stencil_mt = NULL;
1422 if (mask & BUFFER_BIT_STENCIL) {
1423 struct intel_renderbuffer *irb = intel_renderbuffer(stencil_rb);
1424 stencil_mt = find_miptree(GL_STENCIL_BUFFER_BIT, irb);
1425
1426 if (mask & BUFFER_BIT_DEPTH) {
1427 assert(level == irb->mt_level);
1428 assert(start_layer == irb->mt_layer);
1429 assert(num_layers == fb->MaxNumLayers ? irb->layer_count : 1);
1430 } else {
1431 level = irb->mt_level;
1432 start_layer = irb->mt_layer;
1433 }
1434 num_layers = fb->MaxNumLayers ? irb->layer_count : 1;
1435
1436 stencil_mask = ctx->Stencil.WriteMask[0] & 0xff;
1437
1438 intel_miptree_prepare_access(brw, stencil_mt, level, 1,
1439 start_layer, num_layers,
1440 ISL_AUX_USAGE_NONE, false);
1441
1442 unsigned stencil_level = level;
1443 blorp_surf_for_miptree(brw, &stencil_surf, stencil_mt,
1444 ISL_AUX_USAGE_NONE, true,
1445 &stencil_level, start_layer, num_layers,
1446 &isl_tmp[2]);
1447 }
1448
1449 assert((mask & BUFFER_BIT_DEPTH) || stencil_mask);
1450
1451 struct blorp_batch batch;
1452 blorp_batch_init(&brw->blorp, &batch, brw, 0);
1453 blorp_clear_depth_stencil(&batch, &depth_surf, &stencil_surf,
1454 level, start_layer, num_layers,
1455 x0, y0, x1, y1,
1456 (mask & BUFFER_BIT_DEPTH), ctx->Depth.Clear,
1457 stencil_mask, ctx->Stencil.Clear);
1458 blorp_batch_finish(&batch);
1459
1460 if (mask & BUFFER_BIT_DEPTH) {
1461 intel_miptree_finish_depth(brw, depth_mt, level,
1462 start_layer, num_layers, true);
1463 }
1464
1465 if (stencil_mask) {
1466 intel_miptree_finish_write(brw, stencil_mt, level,
1467 start_layer, num_layers,
1468 ISL_AUX_USAGE_NONE);
1469 }
1470 }
1471
1472 void
1473 brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
1474 unsigned level, unsigned layer,
1475 enum isl_aux_op resolve_op)
1476 {
1477 DBG("%s to mt %p level %u layer %u\n", __FUNCTION__, mt, level, layer);
1478
1479 const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
1480
1481 struct isl_surf isl_tmp[1];
1482 struct blorp_surf surf;
1483 blorp_surf_for_miptree(brw, &surf, mt, mt->aux_usage, true,
1484 &level, layer, 1 /* num_layers */,
1485 isl_tmp);
1486
1487 /* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
1488 *
1489 * "Any transition from any value in {Clear, Render, Resolve} to a
1490 * different value in {Clear, Render, Resolve} requires end of pipe
1491 * synchronization."
1492 *
1493 * In other words, fast clear ops are not properly synchronized with
1494 * other drawing. We need to use a PIPE_CONTROL to ensure that the
1495 * contents of the previous draw hit the render target before we resolve
1496 * and again afterwards to ensure that the resolve is complete before we
1497 * do any more regular drawing.
1498 */
1499 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
1500
1501
1502 struct blorp_batch batch;
1503 blorp_batch_init(&brw->blorp, &batch, brw, 0);
1504 blorp_ccs_resolve(&batch, &surf, level, layer, 1,
1505 brw_blorp_to_isl_format(brw, format, true),
1506 resolve_op);
1507 blorp_batch_finish(&batch);
1508
1509 /* See comment above */
1510 brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
1511 }
1512
1513 void
1514 brw_blorp_mcs_partial_resolve(struct brw_context *brw,
1515 struct intel_mipmap_tree *mt,
1516 uint32_t start_layer, uint32_t num_layers)
1517 {
1518 DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt,
1519 start_layer, start_layer + num_layers - 1);
1520
1521 assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
1522
1523 const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
1524 enum isl_format isl_format = brw_blorp_to_isl_format(brw, format, true);
1525
1526 struct isl_surf isl_tmp[1];
1527 struct blorp_surf surf;
1528 uint32_t level = 0;
1529 blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_MCS, true,
1530 &level, start_layer, num_layers, isl_tmp);
1531
1532 struct blorp_batch batch;
1533 blorp_batch_init(&brw->blorp, &batch, brw, 0);
1534 blorp_mcs_partial_resolve(&batch, &surf, isl_format,
1535 start_layer, num_layers);
1536 blorp_batch_finish(&batch);
1537 }
1538
1539 /**
1540 * Perform a HiZ or depth resolve operation.
1541 *
1542 * For an overview of HiZ ops, see the following sections of the Sandy Bridge
1543 * PRM, Volume 1, Part 2:
1544 * - 7.5.3.1 Depth Buffer Clear
1545 * - 7.5.3.2 Depth Buffer Resolve
1546 * - 7.5.3.3 Hierarchical Depth Buffer Resolve
1547 */
1548 void
1549 intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
1550 unsigned int level, unsigned int start_layer,
1551 unsigned int num_layers, enum isl_aux_op op)
1552 {
1553 assert(intel_miptree_level_has_hiz(mt, level));
1554 assert(op != ISL_AUX_OP_NONE);
1555 const struct gen_device_info *devinfo = &brw->screen->devinfo;
1556 const char *opname = NULL;
1557
1558 switch (op) {
1559 case ISL_AUX_OP_FULL_RESOLVE:
1560 opname = "depth resolve";
1561 break;
1562 case ISL_AUX_OP_AMBIGUATE:
1563 opname = "hiz ambiguate";
1564 break;
1565 case ISL_AUX_OP_FAST_CLEAR:
1566 opname = "depth clear";
1567 break;
1568 case ISL_AUX_OP_PARTIAL_RESOLVE:
1569 case ISL_AUX_OP_NONE:
1570 unreachable("Invalid HiZ op");
1571 }
1572
1573 DBG("%s %s to mt %p level %d layers %d-%d\n",
1574 __func__, opname, mt, level, start_layer, start_layer + num_layers - 1);
1575
1576 /* The following stalls and flushes are only documented to be required for
1577 * HiZ clear operations. However, they also seem to be required for
1578 * resolve operations.
1579 */
1580 if (devinfo->gen == 6) {
1581 /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
1582 *
1583 * "If other rendering operations have preceded this clear, a
1584 * PIPE_CONTROL with write cache flush enabled and Z-inhibit
1585 * disabled must be issued before the rectangle primitive used for
1586 * the depth buffer clear operation.
1587 */
1588 brw_emit_pipe_control_flush(brw,
1589 PIPE_CONTROL_RENDER_TARGET_FLUSH |
1590 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1591 PIPE_CONTROL_CS_STALL);
1592 } else if (devinfo->gen >= 7) {
1593 /*
1594 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
1595 *
1596 * If other rendering operations have preceded this clear, a
1597 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1598 * enabled must be issued before the rectangle primitive used for
1599 * the depth buffer clear operation.
1600 *
1601 * Same applies for Gen8 and Gen9.
1602 *
1603 * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
1604 * PIPE_CONTROL, Depth Cache Flush Enable:
1605 *
1606 * This bit must not be set when Depth Stall Enable bit is set in
1607 * this packet.
1608 *
1609 * This is confirmed to hold for real, HSW gets immediate gpu hangs.
1610 *
1611 * Therefore issue two pipe control flushes, one for cache flush and
1612 * another for depth stall.
1613 */
1614 brw_emit_pipe_control_flush(brw,
1615 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1616 PIPE_CONTROL_CS_STALL);
1617
1618 brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
1619 }
1620
1621 assert(mt->aux_usage == ISL_AUX_USAGE_HIZ && mt->hiz_buf);
1622
1623 struct isl_surf isl_tmp[2];
1624 struct blorp_surf surf;
1625 blorp_surf_for_miptree(brw, &surf, mt, ISL_AUX_USAGE_HIZ, true,
1626 &level, start_layer, num_layers, isl_tmp);
1627
1628 struct blorp_batch batch;
1629 blorp_batch_init(&brw->blorp, &batch, brw, 0);
1630 blorp_hiz_op(&batch, &surf, level, start_layer, num_layers, op);
1631 blorp_batch_finish(&batch);
1632
1633 /* The following stalls and flushes are only documented to be required for
1634 * HiZ clear operations. However, they also seem to be required for
1635 * resolve operations.
1636 */
1637 if (devinfo->gen == 6) {
1638 /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
1639 *
1640 * "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be
1641 * followed by a PIPE_CONTROL command with DEPTH_STALL bit set
1642 * and Then followed by Depth FLUSH'
1643 */
1644 brw_emit_pipe_control_flush(brw,
1645 PIPE_CONTROL_DEPTH_STALL);
1646
1647 brw_emit_pipe_control_flush(brw,
1648 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1649 PIPE_CONTROL_CS_STALL);
1650 } else if (devinfo->gen >= 8) {
1651 /*
1652 * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
1653 *
1654 * "Depth buffer clear pass using any of the methods (WM_STATE,
1655 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
1656 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
1657 * "set" before starting to render. DepthStall and DepthFlush are
1658 * not needed between consecutive depth clear passes nor is it
1659 * required if the depth clear pass was done with
1660 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
1661 *
1662 * TODO: Such as the spec says, this could be conditional.
1663 */
1664 brw_emit_pipe_control_flush(brw,
1665 PIPE_CONTROL_DEPTH_CACHE_FLUSH |
1666 PIPE_CONTROL_DEPTH_STALL);
1667
1668 }
1669 }