gallium/radeon: don't allocate HTILE in a separate buffer
[mesa.git] / src / gallium / drivers / radeonsi / si_blit.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_pipe.h"
25 #include "util/u_format.h"
26 #include "util/u_surface.h"
27
28 enum si_blitter_op /* bitmask */
29 {
30 SI_SAVE_TEXTURES = 1,
31 SI_SAVE_FRAMEBUFFER = 2,
32 SI_SAVE_FRAGMENT_STATE = 4,
33 SI_DISABLE_RENDER_COND = 8,
34
35 SI_CLEAR = SI_SAVE_FRAGMENT_STATE,
36
37 SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
38
39 SI_COPY = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES |
40 SI_SAVE_FRAGMENT_STATE | SI_DISABLE_RENDER_COND,
41
42 SI_BLIT = SI_SAVE_FRAMEBUFFER | SI_SAVE_TEXTURES |
43 SI_SAVE_FRAGMENT_STATE,
44
45 SI_DECOMPRESS = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE |
46 SI_DISABLE_RENDER_COND,
47
48 SI_COLOR_RESOLVE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE
49 };
50
51 static void si_blitter_begin(struct pipe_context *ctx, enum si_blitter_op op)
52 {
53 struct si_context *sctx = (struct si_context *)ctx;
54
55 util_blitter_save_vertex_buffer_slot(sctx->blitter, sctx->vertex_buffer);
56 util_blitter_save_vertex_elements(sctx->blitter, sctx->vertex_elements);
57 util_blitter_save_vertex_shader(sctx->blitter, sctx->vs_shader.cso);
58 util_blitter_save_tessctrl_shader(sctx->blitter, sctx->tcs_shader.cso);
59 util_blitter_save_tesseval_shader(sctx->blitter, sctx->tes_shader.cso);
60 util_blitter_save_geometry_shader(sctx->blitter, sctx->gs_shader.cso);
61 util_blitter_save_so_targets(sctx->blitter, sctx->b.streamout.num_targets,
62 (struct pipe_stream_output_target**)sctx->b.streamout.targets);
63 util_blitter_save_rasterizer(sctx->blitter, sctx->queued.named.rasterizer);
64
65 if (op & SI_SAVE_FRAGMENT_STATE) {
66 util_blitter_save_blend(sctx->blitter, sctx->queued.named.blend);
67 util_blitter_save_depth_stencil_alpha(sctx->blitter, sctx->queued.named.dsa);
68 util_blitter_save_stencil_ref(sctx->blitter, &sctx->stencil_ref.state);
69 util_blitter_save_fragment_shader(sctx->blitter, sctx->ps_shader.cso);
70 util_blitter_save_sample_mask(sctx->blitter, sctx->sample_mask.sample_mask);
71 util_blitter_save_viewport(sctx->blitter, &sctx->b.viewports.states[0]);
72 util_blitter_save_scissor(sctx->blitter, &sctx->b.scissors.states[0]);
73 }
74
75 if (op & SI_SAVE_FRAMEBUFFER)
76 util_blitter_save_framebuffer(sctx->blitter, &sctx->framebuffer.state);
77
78 if (op & SI_SAVE_TEXTURES) {
79 util_blitter_save_fragment_sampler_states(
80 sctx->blitter, 2,
81 (void**)sctx->samplers[PIPE_SHADER_FRAGMENT].views.sampler_states);
82
83 util_blitter_save_fragment_sampler_views(sctx->blitter, 2,
84 sctx->samplers[PIPE_SHADER_FRAGMENT].views.views);
85 }
86
87 if (op & SI_DISABLE_RENDER_COND)
88 sctx->b.render_cond_force_off = true;
89 }
90
91 static void si_blitter_end(struct pipe_context *ctx)
92 {
93 struct si_context *sctx = (struct si_context *)ctx;
94
95 sctx->b.render_cond_force_off = false;
96 }
97
98 static unsigned u_max_sample(struct pipe_resource *r)
99 {
100 return r->nr_samples ? r->nr_samples - 1 : 0;
101 }
102
103 static unsigned
104 si_blit_dbcb_copy(struct si_context *sctx,
105 struct r600_texture *src,
106 struct r600_texture *dst,
107 unsigned planes, unsigned level_mask,
108 unsigned first_layer, unsigned last_layer,
109 unsigned first_sample, unsigned last_sample)
110 {
111 struct pipe_surface surf_tmpl = {{0}};
112 unsigned layer, sample, checked_last_layer, max_layer;
113 unsigned fully_copied_levels = 0;
114
115 if (planes & PIPE_MASK_Z)
116 sctx->dbcb_depth_copy_enabled = true;
117 if (planes & PIPE_MASK_S)
118 sctx->dbcb_stencil_copy_enabled = true;
119 si_mark_atom_dirty(sctx, &sctx->db_render_state);
120
121 assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
122
123 bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
124 sctx->decompression_enabled = true;
125 sctx->framebuffer.do_update_surf_dirtiness = false;
126
127 while (level_mask) {
128 unsigned level = u_bit_scan(&level_mask);
129
130 /* The smaller the mipmap level, the less layers there are
131 * as far as 3D textures are concerned. */
132 max_layer = util_max_layer(&src->resource.b.b, level);
133 checked_last_layer = MIN2(last_layer, max_layer);
134
135 surf_tmpl.u.tex.level = level;
136
137 for (layer = first_layer; layer <= checked_last_layer; layer++) {
138 struct pipe_surface *zsurf, *cbsurf;
139
140 surf_tmpl.format = src->resource.b.b.format;
141 surf_tmpl.u.tex.first_layer = layer;
142 surf_tmpl.u.tex.last_layer = layer;
143
144 zsurf = sctx->b.b.create_surface(&sctx->b.b, &src->resource.b.b, &surf_tmpl);
145
146 surf_tmpl.format = dst->resource.b.b.format;
147 cbsurf = sctx->b.b.create_surface(&sctx->b.b, &dst->resource.b.b, &surf_tmpl);
148
149 for (sample = first_sample; sample <= last_sample; sample++) {
150 if (sample != sctx->dbcb_copy_sample) {
151 sctx->dbcb_copy_sample = sample;
152 si_mark_atom_dirty(sctx, &sctx->db_render_state);
153 }
154
155 si_blitter_begin(&sctx->b.b, SI_DECOMPRESS);
156 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, cbsurf, 1 << sample,
157 sctx->custom_dsa_flush, 1.0f);
158 si_blitter_end(&sctx->b.b);
159 }
160
161 pipe_surface_reference(&zsurf, NULL);
162 pipe_surface_reference(&cbsurf, NULL);
163 }
164
165 if (first_layer == 0 && last_layer >= max_layer &&
166 first_sample == 0 && last_sample >= u_max_sample(&src->resource.b.b))
167 fully_copied_levels |= 1u << level;
168 }
169
170 sctx->decompression_enabled = false;
171 sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
172 sctx->dbcb_depth_copy_enabled = false;
173 sctx->dbcb_stencil_copy_enabled = false;
174 si_mark_atom_dirty(sctx, &sctx->db_render_state);
175
176 return fully_copied_levels;
177 }
178
179 static void si_blit_decompress_depth(struct pipe_context *ctx,
180 struct r600_texture *texture,
181 struct r600_texture *staging,
182 unsigned first_level, unsigned last_level,
183 unsigned first_layer, unsigned last_layer,
184 unsigned first_sample, unsigned last_sample)
185 {
186 const struct util_format_description *desc;
187 unsigned planes = 0;
188
189 assert(staging != NULL && "use si_blit_decompress_zs_in_place instead");
190
191 desc = util_format_description(staging->resource.b.b.format);
192
193 if (util_format_has_depth(desc))
194 planes |= PIPE_MASK_Z;
195 if (util_format_has_stencil(desc))
196 planes |= PIPE_MASK_S;
197
198 si_blit_dbcb_copy(
199 (struct si_context *)ctx, texture, staging, planes,
200 u_bit_consecutive(first_level, last_level - first_level + 1),
201 first_layer, last_layer, first_sample, last_sample);
202 }
203
204 /* Helper function for si_blit_decompress_zs_in_place.
205 */
206 static void
207 si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
208 struct r600_texture *texture,
209 unsigned planes, unsigned level_mask,
210 unsigned first_layer, unsigned last_layer)
211 {
212 struct pipe_surface *zsurf, surf_tmpl = {{0}};
213 unsigned layer, max_layer, checked_last_layer;
214 unsigned fully_decompressed_mask = 0;
215
216 if (!level_mask)
217 return;
218
219 if (planes & PIPE_MASK_S)
220 sctx->db_flush_stencil_inplace = true;
221 if (planes & PIPE_MASK_Z)
222 sctx->db_flush_depth_inplace = true;
223 si_mark_atom_dirty(sctx, &sctx->db_render_state);
224
225 surf_tmpl.format = texture->resource.b.b.format;
226
227 bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
228 sctx->decompression_enabled = true;
229 sctx->framebuffer.do_update_surf_dirtiness = false;
230
231 while (level_mask) {
232 unsigned level = u_bit_scan(&level_mask);
233
234 surf_tmpl.u.tex.level = level;
235
236 /* The smaller the mipmap level, the less layers there are
237 * as far as 3D textures are concerned. */
238 max_layer = util_max_layer(&texture->resource.b.b, level);
239 checked_last_layer = MIN2(last_layer, max_layer);
240
241 for (layer = first_layer; layer <= checked_last_layer; layer++) {
242 surf_tmpl.u.tex.first_layer = layer;
243 surf_tmpl.u.tex.last_layer = layer;
244
245 zsurf = sctx->b.b.create_surface(&sctx->b.b, &texture->resource.b.b, &surf_tmpl);
246
247 si_blitter_begin(&sctx->b.b, SI_DECOMPRESS);
248 util_blitter_custom_depth_stencil(sctx->blitter, zsurf, NULL, ~0,
249 sctx->custom_dsa_flush,
250 1.0f);
251 si_blitter_end(&sctx->b.b);
252
253 pipe_surface_reference(&zsurf, NULL);
254 }
255
256 /* The texture will always be dirty if some layers aren't flushed.
257 * I don't think this case occurs often though. */
258 if (first_layer == 0 && last_layer >= max_layer) {
259 fully_decompressed_mask |= 1u << level;
260 }
261 }
262
263 if (planes & PIPE_MASK_Z)
264 texture->dirty_level_mask &= ~fully_decompressed_mask;
265 if (planes & PIPE_MASK_S)
266 texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
267
268 sctx->decompression_enabled = false;
269 sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
270 sctx->db_flush_depth_inplace = false;
271 sctx->db_flush_stencil_inplace = false;
272 si_mark_atom_dirty(sctx, &sctx->db_render_state);
273 }
274
275 /* Helper function of si_flush_depth_texture: decompress the given levels
276 * of Z and/or S planes in place.
277 */
278 static void
279 si_blit_decompress_zs_in_place(struct si_context *sctx,
280 struct r600_texture *texture,
281 unsigned levels_z, unsigned levels_s,
282 unsigned first_layer, unsigned last_layer)
283 {
284 unsigned both = levels_z & levels_s;
285
286 /* First, do combined Z & S decompresses for levels that need it. */
287 if (both) {
288 si_blit_decompress_zs_planes_in_place(
289 sctx, texture, PIPE_MASK_Z | PIPE_MASK_S,
290 both,
291 first_layer, last_layer);
292 levels_z &= ~both;
293 levels_s &= ~both;
294 }
295
296 /* Now do separate Z and S decompresses. */
297 if (levels_z) {
298 si_blit_decompress_zs_planes_in_place(
299 sctx, texture, PIPE_MASK_Z,
300 levels_z,
301 first_layer, last_layer);
302 }
303
304 if (levels_s) {
305 si_blit_decompress_zs_planes_in_place(
306 sctx, texture, PIPE_MASK_S,
307 levels_s,
308 first_layer, last_layer);
309 }
310 }
311
312 static void
313 si_decompress_depth(struct si_context *sctx,
314 struct r600_texture *tex,
315 unsigned required_planes,
316 unsigned first_level, unsigned last_level,
317 unsigned first_layer, unsigned last_layer)
318 {
319 unsigned inplace_planes = 0;
320 unsigned copy_planes = 0;
321 unsigned level_mask = u_bit_consecutive(first_level, last_level - first_level + 1);
322 unsigned levels_z = 0;
323 unsigned levels_s = 0;
324
325 if (required_planes & PIPE_MASK_Z) {
326 levels_z = level_mask & tex->dirty_level_mask;
327
328 if (levels_z) {
329 if (r600_can_sample_zs(tex, false))
330 inplace_planes |= PIPE_MASK_Z;
331 else
332 copy_planes |= PIPE_MASK_Z;
333 }
334 }
335 if (required_planes & PIPE_MASK_S) {
336 levels_s = level_mask & tex->stencil_dirty_level_mask;
337
338 if (levels_s) {
339 if (r600_can_sample_zs(tex, true))
340 inplace_planes |= PIPE_MASK_S;
341 else
342 copy_planes |= PIPE_MASK_S;
343 }
344 }
345
346 assert(!tex->tc_compatible_htile || levels_z == 0);
347 assert(!tex->tc_compatible_htile || levels_s == 0 ||
348 !r600_can_sample_zs(tex, true));
349
350 /* We may have to allocate the flushed texture here when called from
351 * si_decompress_subresource.
352 */
353 if (copy_planes &&
354 (tex->flushed_depth_texture ||
355 r600_init_flushed_depth_texture(&sctx->b.b, &tex->resource.b.b, NULL))) {
356 struct r600_texture *dst = tex->flushed_depth_texture;
357 unsigned fully_copied_levels;
358 unsigned levels = 0;
359
360 assert(tex->flushed_depth_texture);
361
362 if (util_format_is_depth_and_stencil(dst->resource.b.b.format))
363 copy_planes = PIPE_MASK_Z | PIPE_MASK_S;
364
365 if (copy_planes & PIPE_MASK_Z) {
366 levels |= levels_z;
367 levels_z = 0;
368 }
369 if (copy_planes & PIPE_MASK_S) {
370 levels |= levels_s;
371 levels_s = 0;
372 }
373
374 fully_copied_levels = si_blit_dbcb_copy(
375 sctx, tex, dst, copy_planes, levels,
376 first_layer, last_layer,
377 0, u_max_sample(&tex->resource.b.b));
378
379 if (copy_planes & PIPE_MASK_Z)
380 tex->dirty_level_mask &= ~fully_copied_levels;
381 if (copy_planes & PIPE_MASK_S)
382 tex->stencil_dirty_level_mask &= ~fully_copied_levels;
383 }
384
385 if (inplace_planes) {
386 si_blit_decompress_zs_in_place(
387 sctx, tex,
388 levels_z, levels_s,
389 first_layer, last_layer);
390 }
391 }
392
393 static void
394 si_decompress_sampler_depth_textures(struct si_context *sctx,
395 struct si_textures_info *textures)
396 {
397 unsigned i;
398 unsigned mask = textures->needs_depth_decompress_mask;
399
400 while (mask) {
401 struct pipe_sampler_view *view;
402 struct si_sampler_view *sview;
403 struct r600_texture *tex;
404
405 i = u_bit_scan(&mask);
406
407 view = textures->views.views[i];
408 assert(view);
409 sview = (struct si_sampler_view*)view;
410
411 tex = (struct r600_texture *)view->texture;
412 assert(tex->db_compatible);
413
414 si_decompress_depth(sctx, tex,
415 sview->is_stencil_sampler ? PIPE_MASK_S : PIPE_MASK_Z,
416 view->u.tex.first_level, view->u.tex.last_level,
417 0, util_max_layer(&tex->resource.b.b, view->u.tex.first_level));
418 }
419 }
420
421 static void si_blit_decompress_color(struct pipe_context *ctx,
422 struct r600_texture *rtex,
423 unsigned first_level, unsigned last_level,
424 unsigned first_layer, unsigned last_layer,
425 bool need_dcc_decompress)
426 {
427 struct si_context *sctx = (struct si_context *)ctx;
428 void* custom_blend;
429 unsigned layer, checked_last_layer, max_layer;
430 unsigned level_mask =
431 u_bit_consecutive(first_level, last_level - first_level + 1);
432
433 if (!need_dcc_decompress)
434 level_mask &= rtex->dirty_level_mask;
435 if (!level_mask)
436 return;
437
438 if (rtex->dcc_offset && need_dcc_decompress) {
439 custom_blend = sctx->custom_blend_dcc_decompress;
440
441 /* disable levels without DCC */
442 for (int i = first_level; i <= last_level; i++) {
443 if (!vi_dcc_enabled(rtex, i))
444 level_mask &= ~(1 << i);
445 }
446 } else if (rtex->fmask.size) {
447 custom_blend = sctx->custom_blend_fmask_decompress;
448 } else {
449 custom_blend = sctx->custom_blend_eliminate_fastclear;
450 }
451
452 bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
453 sctx->decompression_enabled = true;
454 sctx->framebuffer.do_update_surf_dirtiness = false;
455
456 while (level_mask) {
457 unsigned level = u_bit_scan(&level_mask);
458
459 /* The smaller the mipmap level, the less layers there are
460 * as far as 3D textures are concerned. */
461 max_layer = util_max_layer(&rtex->resource.b.b, level);
462 checked_last_layer = MIN2(last_layer, max_layer);
463
464 for (layer = first_layer; layer <= checked_last_layer; layer++) {
465 struct pipe_surface *cbsurf, surf_tmpl;
466
467 surf_tmpl.format = rtex->resource.b.b.format;
468 surf_tmpl.u.tex.level = level;
469 surf_tmpl.u.tex.first_layer = layer;
470 surf_tmpl.u.tex.last_layer = layer;
471 cbsurf = ctx->create_surface(ctx, &rtex->resource.b.b, &surf_tmpl);
472
473 si_blitter_begin(ctx, SI_DECOMPRESS);
474 util_blitter_custom_color(sctx->blitter, cbsurf, custom_blend);
475 si_blitter_end(ctx);
476
477 pipe_surface_reference(&cbsurf, NULL);
478 }
479
480 /* The texture will always be dirty if some layers aren't flushed.
481 * I don't think this case occurs often though. */
482 if (first_layer == 0 && last_layer >= max_layer) {
483 rtex->dirty_level_mask &= ~(1 << level);
484 }
485 }
486
487 sctx->decompression_enabled = false;
488 sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
489 }
490
491 static void
492 si_decompress_color_texture(struct si_context *sctx, struct r600_texture *tex,
493 unsigned first_level, unsigned last_level)
494 {
495 /* CMASK or DCC can be discarded and we can still end up here. */
496 if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
497 return;
498
499 si_blit_decompress_color(&sctx->b.b, tex, first_level, last_level, 0,
500 util_max_layer(&tex->resource.b.b, first_level),
501 false);
502 }
503
504 static void
505 si_decompress_sampler_color_textures(struct si_context *sctx,
506 struct si_textures_info *textures)
507 {
508 unsigned i;
509 unsigned mask = textures->needs_color_decompress_mask;
510
511 while (mask) {
512 struct pipe_sampler_view *view;
513 struct r600_texture *tex;
514
515 i = u_bit_scan(&mask);
516
517 view = textures->views.views[i];
518 assert(view);
519
520 tex = (struct r600_texture *)view->texture;
521
522 si_decompress_color_texture(sctx, tex, view->u.tex.first_level,
523 view->u.tex.last_level);
524 }
525 }
526
527 static void
528 si_decompress_image_color_textures(struct si_context *sctx,
529 struct si_images_info *images)
530 {
531 unsigned i;
532 unsigned mask = images->needs_color_decompress_mask;
533
534 while (mask) {
535 const struct pipe_image_view *view;
536 struct r600_texture *tex;
537
538 i = u_bit_scan(&mask);
539
540 view = &images->views[i];
541 assert(view->resource->target != PIPE_BUFFER);
542
543 tex = (struct r600_texture *)view->resource;
544
545 si_decompress_color_texture(sctx, tex, view->u.tex.level,
546 view->u.tex.level);
547 }
548 }
549
550 static void si_check_render_feedback_texture(struct si_context *sctx,
551 struct r600_texture *tex,
552 unsigned first_level,
553 unsigned last_level,
554 unsigned first_layer,
555 unsigned last_layer)
556 {
557 bool render_feedback = false;
558
559 if (!tex->dcc_offset)
560 return;
561
562 for (unsigned j = 0; j < sctx->framebuffer.state.nr_cbufs; ++j) {
563 struct r600_surface * surf;
564
565 if (!sctx->framebuffer.state.cbufs[j])
566 continue;
567
568 surf = (struct r600_surface*)sctx->framebuffer.state.cbufs[j];
569
570 if (tex == (struct r600_texture *)surf->base.texture &&
571 surf->base.u.tex.level >= first_level &&
572 surf->base.u.tex.level <= last_level &&
573 surf->base.u.tex.first_layer <= last_layer &&
574 surf->base.u.tex.last_layer >= first_layer) {
575 render_feedback = true;
576 break;
577 }
578 }
579
580 if (render_feedback)
581 r600_texture_disable_dcc(&sctx->b, tex);
582 }
583
584 static void si_check_render_feedback_textures(struct si_context *sctx,
585 struct si_textures_info *textures)
586 {
587 uint32_t mask = textures->views.enabled_mask;
588
589 while (mask) {
590 const struct pipe_sampler_view *view;
591 struct r600_texture *tex;
592
593 unsigned i = u_bit_scan(&mask);
594
595 view = textures->views.views[i];
596 if(view->texture->target == PIPE_BUFFER)
597 continue;
598
599 tex = (struct r600_texture *)view->texture;
600
601 si_check_render_feedback_texture(sctx, tex,
602 view->u.tex.first_level,
603 view->u.tex.last_level,
604 view->u.tex.first_layer,
605 view->u.tex.last_layer);
606 }
607 }
608
609 static void si_check_render_feedback_images(struct si_context *sctx,
610 struct si_images_info *images)
611 {
612 uint32_t mask = images->enabled_mask;
613
614 while (mask) {
615 const struct pipe_image_view *view;
616 struct r600_texture *tex;
617
618 unsigned i = u_bit_scan(&mask);
619
620 view = &images->views[i];
621 if (view->resource->target == PIPE_BUFFER)
622 continue;
623
624 tex = (struct r600_texture *)view->resource;
625
626 si_check_render_feedback_texture(sctx, tex,
627 view->u.tex.level,
628 view->u.tex.level,
629 view->u.tex.first_layer,
630 view->u.tex.last_layer);
631 }
632 }
633
634 static void si_check_render_feedback(struct si_context *sctx)
635 {
636
637 if (!sctx->need_check_render_feedback)
638 return;
639
640 for (int i = 0; i < SI_NUM_SHADERS; ++i) {
641 si_check_render_feedback_images(sctx, &sctx->images[i]);
642 si_check_render_feedback_textures(sctx, &sctx->samplers[i]);
643 }
644 sctx->need_check_render_feedback = false;
645 }
646
647 static void si_decompress_textures(struct si_context *sctx, unsigned shader_mask)
648 {
649 unsigned compressed_colortex_counter, mask;
650
651 if (sctx->blitter->running)
652 return;
653
654 /* Update the compressed_colortex_mask if necessary. */
655 compressed_colortex_counter = p_atomic_read(&sctx->screen->b.compressed_colortex_counter);
656 if (compressed_colortex_counter != sctx->b.last_compressed_colortex_counter) {
657 sctx->b.last_compressed_colortex_counter = compressed_colortex_counter;
658 si_update_compressed_colortex_masks(sctx);
659 }
660
661 /* Decompress color & depth textures if needed. */
662 mask = sctx->compressed_tex_shader_mask & shader_mask;
663 while (mask) {
664 unsigned i = u_bit_scan(&mask);
665
666 if (sctx->samplers[i].needs_depth_decompress_mask) {
667 si_decompress_sampler_depth_textures(sctx, &sctx->samplers[i]);
668 }
669 if (sctx->samplers[i].needs_color_decompress_mask) {
670 si_decompress_sampler_color_textures(sctx, &sctx->samplers[i]);
671 }
672 if (sctx->images[i].needs_color_decompress_mask) {
673 si_decompress_image_color_textures(sctx, &sctx->images[i]);
674 }
675 }
676
677 si_check_render_feedback(sctx);
678 }
679
680 void si_decompress_graphics_textures(struct si_context *sctx)
681 {
682 si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));
683 }
684
685 void si_decompress_compute_textures(struct si_context *sctx)
686 {
687 si_decompress_textures(sctx, 1 << PIPE_SHADER_COMPUTE);
688 }
689
690 static void si_clear(struct pipe_context *ctx, unsigned buffers,
691 const union pipe_color_union *color,
692 double depth, unsigned stencil)
693 {
694 struct si_context *sctx = (struct si_context *)ctx;
695 struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
696 struct pipe_surface *zsbuf = fb->zsbuf;
697 struct r600_texture *zstex =
698 zsbuf ? (struct r600_texture*)zsbuf->texture : NULL;
699
700 if (buffers & PIPE_CLEAR_COLOR) {
701 evergreen_do_fast_color_clear(&sctx->b, fb,
702 &sctx->framebuffer.atom, &buffers,
703 &sctx->framebuffer.dirty_cbufs,
704 color);
705 if (!buffers)
706 return; /* all buffers have been fast cleared */
707 }
708
709 if (buffers & PIPE_CLEAR_COLOR) {
710 int i;
711
712 /* These buffers cannot use fast clear, make sure to disable expansion. */
713 for (i = 0; i < fb->nr_cbufs; i++) {
714 struct r600_texture *tex;
715
716 /* If not clearing this buffer, skip. */
717 if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
718 continue;
719
720 if (!fb->cbufs[i])
721 continue;
722
723 tex = (struct r600_texture *)fb->cbufs[i]->texture;
724 if (tex->fmask.size == 0)
725 tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
726 }
727 }
728
729 if (zstex && zstex->htile_offset &&
730 zsbuf->u.tex.level == 0 &&
731 zsbuf->u.tex.first_layer == 0 &&
732 zsbuf->u.tex.last_layer == util_max_layer(&zstex->resource.b.b, 0)) {
733 /* TC-compatible HTILE only supports depth clears to 0 or 1. */
734 if (buffers & PIPE_CLEAR_DEPTH &&
735 (!zstex->tc_compatible_htile ||
736 depth == 0 || depth == 1)) {
737 /* Need to disable EXPCLEAR temporarily if clearing
738 * to a new value. */
739 if (!zstex->depth_cleared || zstex->depth_clear_value != depth) {
740 sctx->db_depth_disable_expclear = true;
741 }
742
743 zstex->depth_clear_value = depth;
744 sctx->framebuffer.dirty_zsbuf = true;
745 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_DEPTH_CLEAR */
746 sctx->db_depth_clear = true;
747 si_mark_atom_dirty(sctx, &sctx->db_render_state);
748 }
749
750 /* TC-compatible HTILE only supports stencil clears to 0. */
751 if (buffers & PIPE_CLEAR_STENCIL &&
752 (!zstex->tc_compatible_htile || stencil == 0)) {
753 stencil &= 0xff;
754
755 /* Need to disable EXPCLEAR temporarily if clearing
756 * to a new value. */
757 if (!zstex->stencil_cleared || zstex->stencil_clear_value != stencil) {
758 sctx->db_stencil_disable_expclear = true;
759 }
760
761 zstex->stencil_clear_value = stencil;
762 sctx->framebuffer.dirty_zsbuf = true;
763 si_mark_atom_dirty(sctx, &sctx->framebuffer.atom); /* updates DB_STENCIL_CLEAR */
764 sctx->db_stencil_clear = true;
765 si_mark_atom_dirty(sctx, &sctx->db_render_state);
766 }
767 }
768
769 si_blitter_begin(ctx, SI_CLEAR);
770 util_blitter_clear(sctx->blitter, fb->width, fb->height,
771 util_framebuffer_get_num_layers(fb),
772 buffers, color, depth, stencil);
773 si_blitter_end(ctx);
774
775 if (sctx->db_depth_clear) {
776 sctx->db_depth_clear = false;
777 sctx->db_depth_disable_expclear = false;
778 zstex->depth_cleared = true;
779 si_mark_atom_dirty(sctx, &sctx->db_render_state);
780 }
781
782 if (sctx->db_stencil_clear) {
783 sctx->db_stencil_clear = false;
784 sctx->db_stencil_disable_expclear = false;
785 zstex->stencil_cleared = true;
786 si_mark_atom_dirty(sctx, &sctx->db_render_state);
787 }
788 }
789
790 static void si_clear_render_target(struct pipe_context *ctx,
791 struct pipe_surface *dst,
792 const union pipe_color_union *color,
793 unsigned dstx, unsigned dsty,
794 unsigned width, unsigned height,
795 bool render_condition_enabled)
796 {
797 struct si_context *sctx = (struct si_context *)ctx;
798
799 si_blitter_begin(ctx, SI_CLEAR_SURFACE |
800 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
801 util_blitter_clear_render_target(sctx->blitter, dst, color,
802 dstx, dsty, width, height);
803 si_blitter_end(ctx);
804 }
805
806 static void si_clear_depth_stencil(struct pipe_context *ctx,
807 struct pipe_surface *dst,
808 unsigned clear_flags,
809 double depth,
810 unsigned stencil,
811 unsigned dstx, unsigned dsty,
812 unsigned width, unsigned height,
813 bool render_condition_enabled)
814 {
815 struct si_context *sctx = (struct si_context *)ctx;
816
817 si_blitter_begin(ctx, SI_CLEAR_SURFACE |
818 (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
819 util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil,
820 dstx, dsty, width, height);
821 si_blitter_end(ctx);
822 }
823
824 /* Helper for decompressing a portion of a color or depth resource before
825 * blitting if any decompression is needed.
826 * The driver doesn't decompress resources automatically while u_blitter is
827 * rendering. */
828 static void si_decompress_subresource(struct pipe_context *ctx,
829 struct pipe_resource *tex,
830 unsigned planes, unsigned level,
831 unsigned first_layer, unsigned last_layer)
832 {
833 struct si_context *sctx = (struct si_context *)ctx;
834 struct r600_texture *rtex = (struct r600_texture*)tex;
835
836 if (rtex->db_compatible) {
837 planes &= PIPE_MASK_Z | PIPE_MASK_S;
838
839 if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
840 planes &= ~PIPE_MASK_S;
841
842 si_decompress_depth(sctx, rtex, planes,
843 level, level,
844 first_layer, last_layer);
845 } else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
846 si_blit_decompress_color(ctx, rtex, level, level,
847 first_layer, last_layer, false);
848 }
849 }
850
851 struct texture_orig_info {
852 unsigned format;
853 unsigned width0;
854 unsigned height0;
855 unsigned npix_x;
856 unsigned npix_y;
857 unsigned npix0_x;
858 unsigned npix0_y;
859 };
860
861 void si_resource_copy_region(struct pipe_context *ctx,
862 struct pipe_resource *dst,
863 unsigned dst_level,
864 unsigned dstx, unsigned dsty, unsigned dstz,
865 struct pipe_resource *src,
866 unsigned src_level,
867 const struct pipe_box *src_box)
868 {
869 struct si_context *sctx = (struct si_context *)ctx;
870 struct r600_texture *rsrc = (struct r600_texture*)src;
871 struct pipe_surface *dst_view, dst_templ;
872 struct pipe_sampler_view src_templ, *src_view;
873 unsigned dst_width, dst_height, src_width0, src_height0;
874 unsigned dst_width0, dst_height0, src_force_level = 0;
875 struct pipe_box sbox, dstbox;
876
877 /* Handle buffers first. */
878 if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
879 si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, 0);
880 return;
881 }
882
883 assert(u_max_sample(dst) == u_max_sample(src));
884
885 /* The driver doesn't decompress resources automatically while
886 * u_blitter is rendering. */
887 si_decompress_subresource(ctx, src, PIPE_MASK_RGBAZS, src_level,
888 src_box->z, src_box->z + src_box->depth - 1);
889
890 dst_width = u_minify(dst->width0, dst_level);
891 dst_height = u_minify(dst->height0, dst_level);
892 dst_width0 = dst->width0;
893 dst_height0 = dst->height0;
894 src_width0 = src->width0;
895 src_height0 = src->height0;
896
897 util_blitter_default_dst_texture(&dst_templ, dst, dst_level, dstz);
898 util_blitter_default_src_texture(sctx->blitter, &src_templ, src, src_level);
899
900 if (util_format_is_compressed(src->format) ||
901 util_format_is_compressed(dst->format)) {
902 unsigned blocksize = rsrc->surface.bpe;
903
904 if (blocksize == 8)
905 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT; /* 64-bit block */
906 else
907 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT; /* 128-bit block */
908 dst_templ.format = src_templ.format;
909
910 dst_width = util_format_get_nblocksx(dst->format, dst_width);
911 dst_height = util_format_get_nblocksy(dst->format, dst_height);
912 dst_width0 = util_format_get_nblocksx(dst->format, dst_width0);
913 dst_height0 = util_format_get_nblocksy(dst->format, dst_height0);
914 src_width0 = util_format_get_nblocksx(src->format, src_width0);
915 src_height0 = util_format_get_nblocksy(src->format, src_height0);
916
917 dstx = util_format_get_nblocksx(dst->format, dstx);
918 dsty = util_format_get_nblocksy(dst->format, dsty);
919
920 sbox.x = util_format_get_nblocksx(src->format, src_box->x);
921 sbox.y = util_format_get_nblocksy(src->format, src_box->y);
922 sbox.z = src_box->z;
923 sbox.width = util_format_get_nblocksx(src->format, src_box->width);
924 sbox.height = util_format_get_nblocksy(src->format, src_box->height);
925 sbox.depth = src_box->depth;
926 src_box = &sbox;
927
928 src_force_level = src_level;
929 } else if (!util_blitter_is_copy_supported(sctx->blitter, dst, src)) {
930 if (util_format_is_subsampled_422(src->format)) {
931 src_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
932 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UINT;
933
934 dst_width = util_format_get_nblocksx(dst->format, dst_width);
935 dst_width0 = util_format_get_nblocksx(dst->format, dst_width0);
936 src_width0 = util_format_get_nblocksx(src->format, src_width0);
937
938 dstx = util_format_get_nblocksx(dst->format, dstx);
939
940 sbox = *src_box;
941 sbox.x = util_format_get_nblocksx(src->format, src_box->x);
942 sbox.width = util_format_get_nblocksx(src->format, src_box->width);
943 src_box = &sbox;
944 } else {
945 unsigned blocksize = rsrc->surface.bpe;
946
947 switch (blocksize) {
948 case 1:
949 dst_templ.format = PIPE_FORMAT_R8_UNORM;
950 src_templ.format = PIPE_FORMAT_R8_UNORM;
951 break;
952 case 2:
953 dst_templ.format = PIPE_FORMAT_R8G8_UNORM;
954 src_templ.format = PIPE_FORMAT_R8G8_UNORM;
955 break;
956 case 4:
957 dst_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
958 src_templ.format = PIPE_FORMAT_R8G8B8A8_UNORM;
959 break;
960 case 8:
961 dst_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
962 src_templ.format = PIPE_FORMAT_R16G16B16A16_UINT;
963 break;
964 case 16:
965 dst_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
966 src_templ.format = PIPE_FORMAT_R32G32B32A32_UINT;
967 break;
968 default:
969 fprintf(stderr, "Unhandled format %s with blocksize %u\n",
970 util_format_short_name(src->format), blocksize);
971 assert(0);
972 }
973 }
974 }
975
976 vi_disable_dcc_if_incompatible_format(&sctx->b, dst, dst_level,
977 dst_templ.format);
978 vi_disable_dcc_if_incompatible_format(&sctx->b, src, src_level,
979 src_templ.format);
980
981 /* Initialize the surface. */
982 dst_view = r600_create_surface_custom(ctx, dst, &dst_templ,
983 dst_width0, dst_height0,
984 dst_width, dst_height);
985
986 /* Initialize the sampler view. */
987 src_view = si_create_sampler_view_custom(ctx, src, &src_templ,
988 src_width0, src_height0,
989 src_force_level);
990
991 u_box_3d(dstx, dsty, dstz, abs(src_box->width), abs(src_box->height),
992 abs(src_box->depth), &dstbox);
993
994 /* Copy. */
995 si_blitter_begin(ctx, SI_COPY);
996 util_blitter_blit_generic(sctx->blitter, dst_view, &dstbox,
997 src_view, src_box, src_width0, src_height0,
998 PIPE_MASK_RGBAZS, PIPE_TEX_FILTER_NEAREST, NULL,
999 false);
1000 si_blitter_end(ctx);
1001
1002 pipe_surface_reference(&dst_view, NULL);
1003 pipe_sampler_view_reference(&src_view, NULL);
1004 }
1005
1006 static bool do_hardware_msaa_resolve(struct pipe_context *ctx,
1007 const struct pipe_blit_info *info)
1008 {
1009 struct si_context *sctx = (struct si_context*)ctx;
1010 struct r600_texture *src = (struct r600_texture*)info->src.resource;
1011 struct r600_texture *dst = (struct r600_texture*)info->dst.resource;
1012 MAYBE_UNUSED struct r600_texture *rtmp;
1013 unsigned dst_width = u_minify(info->dst.resource->width0, info->dst.level);
1014 unsigned dst_height = u_minify(info->dst.resource->height0, info->dst.level);
1015 enum pipe_format format = info->src.format;
1016 unsigned sample_mask = ~0;
1017 struct pipe_resource *tmp, templ;
1018 struct pipe_blit_info blit;
1019
1020 /* Check basic requirements for hw resolve. */
1021 if (!(info->src.resource->nr_samples > 1 &&
1022 info->dst.resource->nr_samples <= 1 &&
1023 !util_format_is_pure_integer(format) &&
1024 !util_format_is_depth_or_stencil(format) &&
1025 util_max_layer(info->src.resource, 0) == 0))
1026 return false;
1027
1028 /* Hardware MSAA resolve doesn't work if SPI format = NORM16_ABGR and
1029 * the format is R16G16. Use R16A16, which does work.
1030 */
1031 if (format == PIPE_FORMAT_R16G16_UNORM)
1032 format = PIPE_FORMAT_R16A16_UNORM;
1033 if (format == PIPE_FORMAT_R16G16_SNORM)
1034 format = PIPE_FORMAT_R16A16_SNORM;
1035
1036 /* Check the remaining requirements for hw resolve. */
1037 if (util_max_layer(info->dst.resource, info->dst.level) == 0 &&
1038 !info->scissor_enable &&
1039 (info->mask & PIPE_MASK_RGBA) == PIPE_MASK_RGBA &&
1040 util_is_format_compatible(util_format_description(info->src.format),
1041 util_format_description(info->dst.format)) &&
1042 dst_width == info->src.resource->width0 &&
1043 dst_height == info->src.resource->height0 &&
1044 info->dst.box.x == 0 &&
1045 info->dst.box.y == 0 &&
1046 info->dst.box.width == dst_width &&
1047 info->dst.box.height == dst_height &&
1048 info->dst.box.depth == 1 &&
1049 info->src.box.x == 0 &&
1050 info->src.box.y == 0 &&
1051 info->src.box.width == dst_width &&
1052 info->src.box.height == dst_height &&
1053 info->src.box.depth == 1 &&
1054 !dst->surface.is_linear &&
1055 (!dst->cmask.size || !dst->dirty_level_mask)) { /* dst cannot be fast-cleared */
1056 /* Check the last constraint. */
1057 if (src->surface.micro_tile_mode != dst->surface.micro_tile_mode) {
1058 /* The next fast clear will switch to this mode to
1059 * get direct hw resolve next time if the mode is
1060 * different now.
1061 */
1062 src->last_msaa_resolve_target_micro_mode =
1063 dst->surface.micro_tile_mode;
1064 goto resolve_to_temp;
1065 }
1066
1067 /* Resolving into a surface with DCC is unsupported. Since
1068 * it's being overwritten anyway, clear it to uncompressed.
1069 * This is still the fastest codepath even with this clear.
1070 */
1071 if (vi_dcc_enabled(dst, info->dst.level)) {
1072 /* TODO: Implement per-level DCC clears for GFX9. */
1073 if (sctx->b.chip_class >= GFX9 &&
1074 info->dst.resource->last_level != 0)
1075 goto resolve_to_temp;
1076
1077 vi_dcc_clear_level(&sctx->b, dst, info->dst.level,
1078 0xFFFFFFFF);
1079 dst->dirty_level_mask &= ~(1 << info->dst.level);
1080 }
1081
1082 /* Resolve directly from src to dst. */
1083 si_blitter_begin(ctx, SI_COLOR_RESOLVE |
1084 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
1085 util_blitter_custom_resolve_color(sctx->blitter,
1086 info->dst.resource, info->dst.level,
1087 info->dst.box.z,
1088 info->src.resource, info->src.box.z,
1089 sample_mask, sctx->custom_blend_resolve,
1090 format);
1091 si_blitter_end(ctx);
1092 return true;
1093 }
1094
1095 resolve_to_temp:
1096 /* Shader-based resolve is VERY SLOW. Instead, resolve into
1097 * a temporary texture and blit.
1098 */
1099 memset(&templ, 0, sizeof(templ));
1100 templ.target = PIPE_TEXTURE_2D;
1101 templ.format = info->src.resource->format;
1102 templ.width0 = info->src.resource->width0;
1103 templ.height0 = info->src.resource->height0;
1104 templ.depth0 = 1;
1105 templ.array_size = 1;
1106 templ.usage = PIPE_USAGE_DEFAULT;
1107 templ.flags = R600_RESOURCE_FLAG_FORCE_TILING |
1108 R600_RESOURCE_FLAG_DISABLE_DCC;
1109
1110 /* The src and dst microtile modes must be the same. */
1111 if (src->surface.micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
1112 templ.bind = PIPE_BIND_SCANOUT;
1113 else
1114 templ.bind = 0;
1115
1116 tmp = ctx->screen->resource_create(ctx->screen, &templ);
1117 if (!tmp)
1118 return false;
1119 rtmp = (struct r600_texture*)tmp;
1120
1121 assert(!rtmp->surface.is_linear);
1122 assert(src->surface.micro_tile_mode == rtmp->surface.micro_tile_mode);
1123
1124 /* resolve */
1125 si_blitter_begin(ctx, SI_COLOR_RESOLVE |
1126 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
1127 util_blitter_custom_resolve_color(sctx->blitter, tmp, 0, 0,
1128 info->src.resource, info->src.box.z,
1129 sample_mask, sctx->custom_blend_resolve,
1130 format);
1131 si_blitter_end(ctx);
1132
1133 /* blit */
1134 blit = *info;
1135 blit.src.resource = tmp;
1136 blit.src.box.z = 0;
1137
1138 si_blitter_begin(ctx, SI_BLIT |
1139 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
1140 util_blitter_blit(sctx->blitter, &blit);
1141 si_blitter_end(ctx);
1142
1143 pipe_resource_reference(&tmp, NULL);
1144 return true;
1145 }
1146
1147 static void si_blit(struct pipe_context *ctx,
1148 const struct pipe_blit_info *info)
1149 {
1150 struct si_context *sctx = (struct si_context*)ctx;
1151 struct r600_texture *rdst = (struct r600_texture *)info->dst.resource;
1152
1153 if (do_hardware_msaa_resolve(ctx, info)) {
1154 return;
1155 }
1156
1157 /* Using SDMA for copying to a linear texture in GTT is much faster.
1158 * This improves DRI PRIME performance.
1159 *
1160 * resource_copy_region can't do this yet, because dma_copy calls it
1161 * on failure (recursion).
1162 */
1163 if (rdst->surface.is_linear &&
1164 sctx->b.dma_copy &&
1165 util_can_blit_via_copy_region(info, false)) {
1166 sctx->b.dma_copy(ctx, info->dst.resource, info->dst.level,
1167 info->dst.box.x, info->dst.box.y,
1168 info->dst.box.z,
1169 info->src.resource, info->src.level,
1170 &info->src.box);
1171 return;
1172 }
1173
1174 assert(util_blitter_is_blit_supported(sctx->blitter, info));
1175
1176 /* The driver doesn't decompress resources automatically while
1177 * u_blitter is rendering. */
1178 vi_disable_dcc_if_incompatible_format(&sctx->b, info->src.resource,
1179 info->src.level,
1180 info->src.format);
1181 vi_disable_dcc_if_incompatible_format(&sctx->b, info->dst.resource,
1182 info->dst.level,
1183 info->dst.format);
1184 si_decompress_subresource(ctx, info->src.resource, info->mask,
1185 info->src.level,
1186 info->src.box.z,
1187 info->src.box.z + info->src.box.depth - 1);
1188
1189 if (sctx->screen->b.debug_flags & DBG_FORCE_DMA &&
1190 util_try_blit_via_copy_region(ctx, info))
1191 return;
1192
1193 si_blitter_begin(ctx, SI_BLIT |
1194 (info->render_condition_enable ? 0 : SI_DISABLE_RENDER_COND));
1195 util_blitter_blit(sctx->blitter, info);
1196 si_blitter_end(ctx);
1197 }
1198
1199 static boolean si_generate_mipmap(struct pipe_context *ctx,
1200 struct pipe_resource *tex,
1201 enum pipe_format format,
1202 unsigned base_level, unsigned last_level,
1203 unsigned first_layer, unsigned last_layer)
1204 {
1205 struct si_context *sctx = (struct si_context*)ctx;
1206 struct r600_texture *rtex = (struct r600_texture *)tex;
1207
1208 if (!util_blitter_is_copy_supported(sctx->blitter, tex, tex))
1209 return false;
1210
1211 /* The driver doesn't decompress resources automatically while
1212 * u_blitter is rendering. */
1213 vi_disable_dcc_if_incompatible_format(&sctx->b, tex, base_level,
1214 format);
1215 si_decompress_subresource(ctx, tex, PIPE_MASK_RGBAZS,
1216 base_level, first_layer, last_layer);
1217
1218 /* Clear dirty_level_mask for the levels that will be overwritten. */
1219 assert(base_level < last_level);
1220 rtex->dirty_level_mask &= ~u_bit_consecutive(base_level + 1,
1221 last_level - base_level);
1222
1223 si_blitter_begin(ctx, SI_BLIT | SI_DISABLE_RENDER_COND);
1224 util_blitter_generate_mipmap(sctx->blitter, tex, format,
1225 base_level, last_level,
1226 first_layer, last_layer);
1227 si_blitter_end(ctx);
1228 return true;
1229 }
1230
1231 static void si_flush_resource(struct pipe_context *ctx,
1232 struct pipe_resource *res)
1233 {
1234 struct r600_texture *rtex = (struct r600_texture*)res;
1235
1236 assert(res->target != PIPE_BUFFER);
1237 assert(!rtex->dcc_separate_buffer || rtex->dcc_gather_statistics);
1238
1239 /* st/dri calls flush twice per frame (not a bug), this prevents double
1240 * decompression. */
1241 if (rtex->dcc_separate_buffer && !rtex->separate_dcc_dirty)
1242 return;
1243
1244 if (!rtex->is_depth && (rtex->cmask.size || rtex->dcc_offset)) {
1245 si_blit_decompress_color(ctx, rtex, 0, res->last_level,
1246 0, util_max_layer(res, 0),
1247 rtex->dcc_separate_buffer != NULL);
1248 }
1249
1250 /* Always do the analysis even if DCC is disabled at the moment. */
1251 if (rtex->dcc_gather_statistics && rtex->separate_dcc_dirty) {
1252 rtex->separate_dcc_dirty = false;
1253 vi_separate_dcc_process_and_reset_stats(ctx, rtex);
1254 }
1255 }
1256
1257 static void si_decompress_dcc(struct pipe_context *ctx,
1258 struct r600_texture *rtex)
1259 {
1260 if (!rtex->dcc_offset)
1261 return;
1262
1263 si_blit_decompress_color(ctx, rtex, 0, rtex->resource.b.b.last_level,
1264 0, util_max_layer(&rtex->resource.b.b, 0),
1265 true);
1266 }
1267
1268 static void si_pipe_clear_buffer(struct pipe_context *ctx,
1269 struct pipe_resource *dst,
1270 unsigned offset, unsigned size,
1271 const void *clear_value_ptr,
1272 int clear_value_size)
1273 {
1274 struct si_context *sctx = (struct si_context*)ctx;
1275 uint32_t dword_value;
1276 unsigned i;
1277
1278 assert(offset % clear_value_size == 0);
1279 assert(size % clear_value_size == 0);
1280
1281 if (clear_value_size > 4) {
1282 const uint32_t *u32 = clear_value_ptr;
1283 bool clear_dword_duplicated = true;
1284
1285 /* See if we can lower large fills to dword fills. */
1286 for (i = 1; i < clear_value_size / 4; i++)
1287 if (u32[0] != u32[i]) {
1288 clear_dword_duplicated = false;
1289 break;
1290 }
1291
1292 if (!clear_dword_duplicated) {
1293 /* Use transform feedback for 64-bit, 96-bit, and
1294 * 128-bit fills.
1295 */
1296 union pipe_color_union clear_value;
1297
1298 memcpy(&clear_value, clear_value_ptr, clear_value_size);
1299 si_blitter_begin(ctx, SI_DISABLE_RENDER_COND);
1300 util_blitter_clear_buffer(sctx->blitter, dst, offset,
1301 size, clear_value_size / 4,
1302 &clear_value);
1303 si_blitter_end(ctx);
1304 return;
1305 }
1306 }
1307
1308 /* Expand the clear value to a dword. */
1309 switch (clear_value_size) {
1310 case 1:
1311 dword_value = *(uint8_t*)clear_value_ptr;
1312 dword_value |= (dword_value << 8) |
1313 (dword_value << 16) |
1314 (dword_value << 24);
1315 break;
1316 case 2:
1317 dword_value = *(uint16_t*)clear_value_ptr;
1318 dword_value |= dword_value << 16;
1319 break;
1320 default:
1321 dword_value = *(uint32_t*)clear_value_ptr;
1322 }
1323
1324 sctx->b.clear_buffer(ctx, dst, offset, size, dword_value,
1325 R600_COHERENCY_SHADER);
1326 }
1327
1328 void si_init_blit_functions(struct si_context *sctx)
1329 {
1330 sctx->b.b.clear = si_clear;
1331 sctx->b.b.clear_buffer = si_pipe_clear_buffer;
1332 sctx->b.b.clear_render_target = si_clear_render_target;
1333 sctx->b.b.clear_depth_stencil = si_clear_depth_stencil;
1334 sctx->b.b.resource_copy_region = si_resource_copy_region;
1335 sctx->b.b.blit = si_blit;
1336 sctx->b.b.flush_resource = si_flush_resource;
1337 sctx->b.b.generate_mipmap = si_generate_mipmap;
1338 sctx->b.blit_decompress_depth = si_blit_decompress_depth;
1339 sctx->b.decompress_dcc = si_decompress_dcc;
1340 }