r600: fork and import gallium/radeon
[mesa.git] / src / gallium / drivers / r600 / r600_texture.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jerome Glisse
25 * Corbin Simpson
26 */
27 #include "r600_pipe_common.h"
28 #include "r600_cs.h"
29 #include "r600_query.h"
30 #include "util/u_format.h"
31 #include "util/u_log.h"
32 #include "util/u_memory.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_surface.h"
35 #include "os/os_time.h"
36 #include <errno.h>
37 #include <inttypes.h>
38
39 static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
40 struct r600_texture *rtex);
41 static enum radeon_surf_mode
42 r600_choose_tiling(struct r600_common_screen *rscreen,
43 const struct pipe_resource *templ);
44
45
46 bool r600_prepare_for_dma_blit(struct r600_common_context *rctx,
47 struct r600_texture *rdst,
48 unsigned dst_level, unsigned dstx,
49 unsigned dsty, unsigned dstz,
50 struct r600_texture *rsrc,
51 unsigned src_level,
52 const struct pipe_box *src_box)
53 {
54 if (!rctx->dma.cs)
55 return false;
56
57 if (rdst->surface.bpe != rsrc->surface.bpe)
58 return false;
59
60 /* MSAA: Blits don't exist in the real world. */
61 if (rsrc->resource.b.b.nr_samples > 1 ||
62 rdst->resource.b.b.nr_samples > 1)
63 return false;
64
65 /* Depth-stencil surfaces:
66 * When dst is linear, the DB->CB copy preserves HTILE.
67 * When dst is tiled, the 3D path must be used to update HTILE.
68 */
69 if (rsrc->is_depth || rdst->is_depth)
70 return false;
71
72 /* CMASK as:
73 * src: Both texture and SDMA paths need decompression. Use SDMA.
74 * dst: If overwriting the whole texture, discard CMASK and use
75 * SDMA. Otherwise, use the 3D path.
76 */
77 if (rdst->cmask.size && rdst->dirty_level_mask & (1 << dst_level)) {
78 /* The CMASK clear is only enabled for the first level. */
79 assert(dst_level == 0);
80 if (!util_texrange_covers_whole_level(&rdst->resource.b.b, dst_level,
81 dstx, dsty, dstz, src_box->width,
82 src_box->height, src_box->depth))
83 return false;
84
85 r600_texture_discard_cmask(rctx->screen, rdst);
86 }
87
88 /* All requirements are met. Prepare textures for SDMA. */
89 if (rsrc->cmask.size && rsrc->dirty_level_mask & (1 << src_level))
90 rctx->b.flush_resource(&rctx->b, &rsrc->resource.b.b);
91
92 assert(!(rsrc->dirty_level_mask & (1 << src_level)));
93 assert(!(rdst->dirty_level_mask & (1 << dst_level)));
94
95 return true;
96 }
97
98 /* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
99 static void r600_copy_region_with_blit(struct pipe_context *pipe,
100 struct pipe_resource *dst,
101 unsigned dst_level,
102 unsigned dstx, unsigned dsty, unsigned dstz,
103 struct pipe_resource *src,
104 unsigned src_level,
105 const struct pipe_box *src_box)
106 {
107 struct pipe_blit_info blit;
108
109 memset(&blit, 0, sizeof(blit));
110 blit.src.resource = src;
111 blit.src.format = src->format;
112 blit.src.level = src_level;
113 blit.src.box = *src_box;
114 blit.dst.resource = dst;
115 blit.dst.format = dst->format;
116 blit.dst.level = dst_level;
117 blit.dst.box.x = dstx;
118 blit.dst.box.y = dsty;
119 blit.dst.box.z = dstz;
120 blit.dst.box.width = src_box->width;
121 blit.dst.box.height = src_box->height;
122 blit.dst.box.depth = src_box->depth;
123 blit.mask = util_format_get_mask(src->format) &
124 util_format_get_mask(dst->format);
125 blit.filter = PIPE_TEX_FILTER_NEAREST;
126
127 if (blit.mask) {
128 pipe->blit(pipe, &blit);
129 }
130 }
131
132 /* Copy from a full GPU texture to a transfer's staging one. */
133 static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
134 {
135 struct r600_common_context *rctx = (struct r600_common_context*)ctx;
136 struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
137 struct pipe_resource *dst = &rtransfer->staging->b.b;
138 struct pipe_resource *src = transfer->resource;
139
140 if (src->nr_samples > 1) {
141 r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
142 src, transfer->level, &transfer->box);
143 return;
144 }
145
146 rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
147 &transfer->box);
148 }
149
150 /* Copy from a transfer's staging texture to a full GPU one. */
151 static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
152 {
153 struct r600_common_context *rctx = (struct r600_common_context*)ctx;
154 struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
155 struct pipe_resource *dst = transfer->resource;
156 struct pipe_resource *src = &rtransfer->staging->b.b;
157 struct pipe_box sbox;
158
159 u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
160
161 if (dst->nr_samples > 1) {
162 r600_copy_region_with_blit(ctx, dst, transfer->level,
163 transfer->box.x, transfer->box.y, transfer->box.z,
164 src, 0, &sbox);
165 return;
166 }
167
168 rctx->dma_copy(ctx, dst, transfer->level,
169 transfer->box.x, transfer->box.y, transfer->box.z,
170 src, 0, &sbox);
171 }
172
173 static unsigned r600_texture_get_offset(struct r600_common_screen *rscreen,
174 struct r600_texture *rtex, unsigned level,
175 const struct pipe_box *box,
176 unsigned *stride,
177 unsigned *layer_stride)
178 {
179 if (rscreen->chip_class >= GFX9) {
180 *stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe;
181 *layer_stride = rtex->surface.u.gfx9.surf_slice_size;
182
183 if (!box)
184 return 0;
185
186 /* Each texture is an array of slices. Each slice is an array
187 * of mipmap levels. */
188 return box->z * rtex->surface.u.gfx9.surf_slice_size +
189 rtex->surface.u.gfx9.offset[level] +
190 (box->y / rtex->surface.blk_h *
191 rtex->surface.u.gfx9.surf_pitch +
192 box->x / rtex->surface.blk_w) * rtex->surface.bpe;
193 } else {
194 *stride = rtex->surface.u.legacy.level[level].nblk_x *
195 rtex->surface.bpe;
196 *layer_stride = rtex->surface.u.legacy.level[level].slice_size;
197
198 if (!box)
199 return rtex->surface.u.legacy.level[level].offset;
200
201 /* Each texture is an array of mipmap levels. Each level is
202 * an array of slices. */
203 return rtex->surface.u.legacy.level[level].offset +
204 box->z * rtex->surface.u.legacy.level[level].slice_size +
205 (box->y / rtex->surface.blk_h *
206 rtex->surface.u.legacy.level[level].nblk_x +
207 box->x / rtex->surface.blk_w) * rtex->surface.bpe;
208 }
209 }
210
211 static int r600_init_surface(struct r600_common_screen *rscreen,
212 struct radeon_surf *surface,
213 const struct pipe_resource *ptex,
214 enum radeon_surf_mode array_mode,
215 unsigned pitch_in_bytes_override,
216 unsigned offset,
217 bool is_imported,
218 bool is_scanout,
219 bool is_flushed_depth,
220 bool tc_compatible_htile)
221 {
222 const struct util_format_description *desc =
223 util_format_description(ptex->format);
224 bool is_depth, is_stencil;
225 int r;
226 unsigned i, bpe, flags = 0;
227
228 is_depth = util_format_has_depth(desc);
229 is_stencil = util_format_has_stencil(desc);
230
231 if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
232 ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
233 bpe = 4; /* stencil is allocated separately on evergreen */
234 } else {
235 bpe = util_format_get_blocksize(ptex->format);
236 assert(util_is_power_of_two(bpe));
237 }
238
239 if (!is_flushed_depth && is_depth) {
240 flags |= RADEON_SURF_ZBUFFER;
241
242 if (tc_compatible_htile &&
243 (rscreen->chip_class >= GFX9 ||
244 array_mode == RADEON_SURF_MODE_2D)) {
245 /* TC-compatible HTILE only supports Z32_FLOAT.
246 * GFX9 also supports Z16_UNORM.
247 * On VI, promote Z16 to Z32. DB->CB copies will convert
248 * the format for transfers.
249 */
250 if (rscreen->chip_class == VI)
251 bpe = 4;
252
253 flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
254 }
255
256 if (is_stencil)
257 flags |= RADEON_SURF_SBUFFER;
258 }
259
260 if (rscreen->chip_class >= VI &&
261 (ptex->flags & R600_RESOURCE_FLAG_DISABLE_DCC ||
262 ptex->format == PIPE_FORMAT_R9G9B9E5_FLOAT))
263 flags |= RADEON_SURF_DISABLE_DCC;
264
265 if (ptex->bind & PIPE_BIND_SCANOUT || is_scanout) {
266 /* This should catch bugs in gallium users setting incorrect flags. */
267 assert(ptex->nr_samples <= 1 &&
268 ptex->array_size == 1 &&
269 ptex->depth0 == 1 &&
270 ptex->last_level == 0 &&
271 !(flags & RADEON_SURF_Z_OR_SBUFFER));
272
273 flags |= RADEON_SURF_SCANOUT;
274 }
275
276 if (ptex->bind & PIPE_BIND_SHARED)
277 flags |= RADEON_SURF_SHAREABLE;
278 if (is_imported)
279 flags |= RADEON_SURF_IMPORTED | RADEON_SURF_SHAREABLE;
280 if (!(ptex->flags & R600_RESOURCE_FLAG_FORCE_TILING))
281 flags |= RADEON_SURF_OPTIMIZE_FOR_SPACE;
282
283 r = rscreen->ws->surface_init(rscreen->ws, ptex, flags, bpe,
284 array_mode, surface);
285 if (r) {
286 return r;
287 }
288
289 if (rscreen->chip_class >= GFX9) {
290 assert(!pitch_in_bytes_override ||
291 pitch_in_bytes_override == surface->u.gfx9.surf_pitch * bpe);
292 surface->u.gfx9.surf_offset = offset;
293 } else {
294 if (pitch_in_bytes_override &&
295 pitch_in_bytes_override != surface->u.legacy.level[0].nblk_x * bpe) {
296 /* old ddx on evergreen over estimate alignment for 1d, only 1 level
297 * for those
298 */
299 surface->u.legacy.level[0].nblk_x = pitch_in_bytes_override / bpe;
300 surface->u.legacy.level[0].slice_size = pitch_in_bytes_override *
301 surface->u.legacy.level[0].nblk_y;
302 }
303
304 if (offset) {
305 for (i = 0; i < ARRAY_SIZE(surface->u.legacy.level); ++i)
306 surface->u.legacy.level[i].offset += offset;
307 }
308 }
309 return 0;
310 }
311
312 static void r600_texture_init_metadata(struct r600_common_screen *rscreen,
313 struct r600_texture *rtex,
314 struct radeon_bo_metadata *metadata)
315 {
316 struct radeon_surf *surface = &rtex->surface;
317
318 memset(metadata, 0, sizeof(*metadata));
319
320 if (rscreen->chip_class >= GFX9) {
321 metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
322 } else {
323 metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
324 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
325 metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
326 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
327 metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
328 metadata->u.legacy.bankw = surface->u.legacy.bankw;
329 metadata->u.legacy.bankh = surface->u.legacy.bankh;
330 metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
331 metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
332 metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
333 metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
334 metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
335 }
336 }
337
338 static void r600_surface_import_metadata(struct r600_common_screen *rscreen,
339 struct radeon_surf *surf,
340 struct radeon_bo_metadata *metadata,
341 enum radeon_surf_mode *array_mode,
342 bool *is_scanout)
343 {
344 if (rscreen->chip_class >= GFX9) {
345 if (metadata->u.gfx9.swizzle_mode > 0)
346 *array_mode = RADEON_SURF_MODE_2D;
347 else
348 *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
349
350 *is_scanout = metadata->u.gfx9.swizzle_mode == 0 ||
351 metadata->u.gfx9.swizzle_mode % 4 == 2;
352
353 surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode;
354 } else {
355 surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
356 surf->u.legacy.bankw = metadata->u.legacy.bankw;
357 surf->u.legacy.bankh = metadata->u.legacy.bankh;
358 surf->u.legacy.tile_split = metadata->u.legacy.tile_split;
359 surf->u.legacy.mtilea = metadata->u.legacy.mtilea;
360 surf->u.legacy.num_banks = metadata->u.legacy.num_banks;
361
362 if (metadata->u.legacy.macrotile == RADEON_LAYOUT_TILED)
363 *array_mode = RADEON_SURF_MODE_2D;
364 else if (metadata->u.legacy.microtile == RADEON_LAYOUT_TILED)
365 *array_mode = RADEON_SURF_MODE_1D;
366 else
367 *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
368
369 *is_scanout = metadata->u.legacy.scanout;
370 }
371 }
372
373 static void r600_eliminate_fast_color_clear(struct r600_common_context *rctx,
374 struct r600_texture *rtex)
375 {
376 struct r600_common_screen *rscreen = rctx->screen;
377 struct pipe_context *ctx = &rctx->b;
378
379 if (ctx == rscreen->aux_context)
380 mtx_lock(&rscreen->aux_context_lock);
381
382 ctx->flush_resource(ctx, &rtex->resource.b.b);
383 ctx->flush(ctx, NULL, 0);
384
385 if (ctx == rscreen->aux_context)
386 mtx_unlock(&rscreen->aux_context_lock);
387 }
388
389 static void r600_texture_discard_cmask(struct r600_common_screen *rscreen,
390 struct r600_texture *rtex)
391 {
392 if (!rtex->cmask.size)
393 return;
394
395 assert(rtex->resource.b.b.nr_samples <= 1);
396
397 /* Disable CMASK. */
398 memset(&rtex->cmask, 0, sizeof(rtex->cmask));
399 rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
400 rtex->dirty_level_mask = 0;
401
402 if (rscreen->chip_class >= SI)
403 rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
404 else
405 rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
406
407 if (rtex->cmask_buffer != &rtex->resource)
408 r600_resource_reference(&rtex->cmask_buffer, NULL);
409
410 /* Notify all contexts about the change. */
411 p_atomic_inc(&rscreen->dirty_tex_counter);
412 p_atomic_inc(&rscreen->compressed_colortex_counter);
413 }
414
415 static bool r600_can_disable_dcc(struct r600_texture *rtex)
416 {
417 /* We can't disable DCC if it can be written by another process. */
418 return rtex->dcc_offset &&
419 (!rtex->resource.b.is_shared ||
420 !(rtex->resource.external_usage & PIPE_HANDLE_USAGE_WRITE));
421 }
422
423 static bool r600_texture_discard_dcc(struct r600_common_screen *rscreen,
424 struct r600_texture *rtex)
425 {
426 if (!r600_can_disable_dcc(rtex))
427 return false;
428
429 assert(rtex->dcc_separate_buffer == NULL);
430
431 /* Disable DCC. */
432 rtex->dcc_offset = 0;
433
434 /* Notify all contexts about the change. */
435 p_atomic_inc(&rscreen->dirty_tex_counter);
436 return true;
437 }
438
439 /**
440 * Disable DCC for the texture. (first decompress, then discard metadata).
441 *
442 * There is unresolved multi-context synchronization issue between
443 * screen::aux_context and the current context. If applications do this with
444 * multiple contexts, it's already undefined behavior for them and we don't
445 * have to worry about that. The scenario is:
446 *
447 * If context 1 disables DCC and context 2 has queued commands that write
448 * to the texture via CB with DCC enabled, and the order of operations is
449 * as follows:
450 * context 2 queues draw calls rendering to the texture, but doesn't flush
451 * context 1 disables DCC and flushes
452 * context 1 & 2 reset descriptors and FB state
453 * context 2 flushes (new compressed tiles written by the draw calls)
454 * context 1 & 2 read garbage, because DCC is disabled, yet there are
455 * compressed tiled
456 *
457 * \param rctx the current context if you have one, or rscreen->aux_context
458 * if you don't.
459 */
460 bool r600_texture_disable_dcc(struct r600_common_context *rctx,
461 struct r600_texture *rtex)
462 {
463 struct r600_common_screen *rscreen = rctx->screen;
464
465 if (!r600_can_disable_dcc(rtex))
466 return false;
467
468 if (&rctx->b == rscreen->aux_context)
469 mtx_lock(&rscreen->aux_context_lock);
470
471 /* Decompress DCC. */
472 rctx->decompress_dcc(&rctx->b, rtex);
473 rctx->b.flush(&rctx->b, NULL, 0);
474
475 if (&rctx->b == rscreen->aux_context)
476 mtx_unlock(&rscreen->aux_context_lock);
477
478 return r600_texture_discard_dcc(rscreen, rtex);
479 }
480
481 static void r600_reallocate_texture_inplace(struct r600_common_context *rctx,
482 struct r600_texture *rtex,
483 unsigned new_bind_flag,
484 bool invalidate_storage)
485 {
486 struct pipe_screen *screen = rctx->b.screen;
487 struct r600_texture *new_tex;
488 struct pipe_resource templ = rtex->resource.b.b;
489 unsigned i;
490
491 templ.bind |= new_bind_flag;
492
493 /* r600g doesn't react to dirty_tex_descriptor_counter */
494 if (rctx->chip_class < SI)
495 return;
496
497 if (rtex->resource.b.is_shared)
498 return;
499
500 if (new_bind_flag == PIPE_BIND_LINEAR) {
501 if (rtex->surface.is_linear)
502 return;
503
504 /* This fails with MSAA, depth, and compressed textures. */
505 if (r600_choose_tiling(rctx->screen, &templ) !=
506 RADEON_SURF_MODE_LINEAR_ALIGNED)
507 return;
508 }
509
510 new_tex = (struct r600_texture*)screen->resource_create(screen, &templ);
511 if (!new_tex)
512 return;
513
514 /* Copy the pixels to the new texture. */
515 if (!invalidate_storage) {
516 for (i = 0; i <= templ.last_level; i++) {
517 struct pipe_box box;
518
519 u_box_3d(0, 0, 0,
520 u_minify(templ.width0, i), u_minify(templ.height0, i),
521 util_max_layer(&templ, i) + 1, &box);
522
523 rctx->dma_copy(&rctx->b, &new_tex->resource.b.b, i, 0, 0, 0,
524 &rtex->resource.b.b, i, &box);
525 }
526 }
527
528 if (new_bind_flag == PIPE_BIND_LINEAR) {
529 r600_texture_discard_cmask(rctx->screen, rtex);
530 r600_texture_discard_dcc(rctx->screen, rtex);
531 }
532
533 /* Replace the structure fields of rtex. */
534 rtex->resource.b.b.bind = templ.bind;
535 pb_reference(&rtex->resource.buf, new_tex->resource.buf);
536 rtex->resource.gpu_address = new_tex->resource.gpu_address;
537 rtex->resource.vram_usage = new_tex->resource.vram_usage;
538 rtex->resource.gart_usage = new_tex->resource.gart_usage;
539 rtex->resource.bo_size = new_tex->resource.bo_size;
540 rtex->resource.bo_alignment = new_tex->resource.bo_alignment;
541 rtex->resource.domains = new_tex->resource.domains;
542 rtex->resource.flags = new_tex->resource.flags;
543 rtex->size = new_tex->size;
544 rtex->db_render_format = new_tex->db_render_format;
545 rtex->db_compatible = new_tex->db_compatible;
546 rtex->can_sample_z = new_tex->can_sample_z;
547 rtex->can_sample_s = new_tex->can_sample_s;
548 rtex->surface = new_tex->surface;
549 rtex->fmask = new_tex->fmask;
550 rtex->cmask = new_tex->cmask;
551 rtex->cb_color_info = new_tex->cb_color_info;
552 rtex->last_msaa_resolve_target_micro_mode = new_tex->last_msaa_resolve_target_micro_mode;
553 rtex->htile_offset = new_tex->htile_offset;
554 rtex->tc_compatible_htile = new_tex->tc_compatible_htile;
555 rtex->depth_cleared = new_tex->depth_cleared;
556 rtex->stencil_cleared = new_tex->stencil_cleared;
557 rtex->non_disp_tiling = new_tex->non_disp_tiling;
558 rtex->dcc_gather_statistics = new_tex->dcc_gather_statistics;
559 rtex->framebuffers_bound = new_tex->framebuffers_bound;
560
561 if (new_bind_flag == PIPE_BIND_LINEAR) {
562 assert(!rtex->htile_offset);
563 assert(!rtex->cmask.size);
564 assert(!rtex->fmask.size);
565 assert(!rtex->dcc_offset);
566 assert(!rtex->is_depth);
567 }
568
569 r600_texture_reference(&new_tex, NULL);
570
571 p_atomic_inc(&rctx->screen->dirty_tex_counter);
572 }
573
574 static boolean r600_texture_get_handle(struct pipe_screen* screen,
575 struct pipe_context *ctx,
576 struct pipe_resource *resource,
577 struct winsys_handle *whandle,
578 unsigned usage)
579 {
580 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
581 struct r600_common_context *rctx;
582 struct r600_resource *res = (struct r600_resource*)resource;
583 struct r600_texture *rtex = (struct r600_texture*)resource;
584 struct radeon_bo_metadata metadata;
585 bool update_metadata = false;
586 unsigned stride, offset, slice_size;
587
588 ctx = threaded_context_unwrap_sync(ctx);
589 rctx = (struct r600_common_context*)(ctx ? ctx : rscreen->aux_context);
590
591 if (resource->target != PIPE_BUFFER) {
592 /* This is not supported now, but it might be required for OpenCL
593 * interop in the future.
594 */
595 if (resource->nr_samples > 1 || rtex->is_depth)
596 return false;
597
598 /* Move a suballocated texture into a non-suballocated allocation. */
599 if (rscreen->ws->buffer_is_suballocated(res->buf) ||
600 rtex->surface.tile_swizzle) {
601 assert(!res->b.is_shared);
602 r600_reallocate_texture_inplace(rctx, rtex,
603 PIPE_BIND_SHARED, false);
604 rctx->b.flush(&rctx->b, NULL, 0);
605 assert(res->b.b.bind & PIPE_BIND_SHARED);
606 assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
607 assert(rtex->surface.tile_swizzle == 0);
608 }
609
610 /* Since shader image stores don't support DCC on VI,
611 * disable it for external clients that want write
612 * access.
613 */
614 if (usage & PIPE_HANDLE_USAGE_WRITE && rtex->dcc_offset) {
615 if (r600_texture_disable_dcc(rctx, rtex))
616 update_metadata = true;
617 }
618
619 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH) &&
620 (rtex->cmask.size || rtex->dcc_offset)) {
621 /* Eliminate fast clear (both CMASK and DCC) */
622 r600_eliminate_fast_color_clear(rctx, rtex);
623
624 /* Disable CMASK if flush_resource isn't going
625 * to be called.
626 */
627 if (rtex->cmask.size)
628 r600_texture_discard_cmask(rscreen, rtex);
629 }
630
631 /* Set metadata. */
632 if (!res->b.is_shared || update_metadata) {
633 r600_texture_init_metadata(rscreen, rtex, &metadata);
634 if (rscreen->query_opaque_metadata)
635 rscreen->query_opaque_metadata(rscreen, rtex,
636 &metadata);
637
638 rscreen->ws->buffer_set_metadata(res->buf, &metadata);
639 }
640
641 if (rscreen->chip_class >= GFX9) {
642 offset = rtex->surface.u.gfx9.surf_offset;
643 stride = rtex->surface.u.gfx9.surf_pitch *
644 rtex->surface.bpe;
645 slice_size = rtex->surface.u.gfx9.surf_slice_size;
646 } else {
647 offset = rtex->surface.u.legacy.level[0].offset;
648 stride = rtex->surface.u.legacy.level[0].nblk_x *
649 rtex->surface.bpe;
650 slice_size = rtex->surface.u.legacy.level[0].slice_size;
651 }
652 } else {
653 /* Move a suballocated buffer into a non-suballocated allocation. */
654 if (rscreen->ws->buffer_is_suballocated(res->buf)) {
655 assert(!res->b.is_shared);
656
657 /* Allocate a new buffer with PIPE_BIND_SHARED. */
658 struct pipe_resource templ = res->b.b;
659 templ.bind |= PIPE_BIND_SHARED;
660
661 struct pipe_resource *newb =
662 screen->resource_create(screen, &templ);
663 if (!newb)
664 return false;
665
666 /* Copy the old buffer contents to the new one. */
667 struct pipe_box box;
668 u_box_1d(0, newb->width0, &box);
669 rctx->b.resource_copy_region(&rctx->b, newb, 0, 0, 0, 0,
670 &res->b.b, 0, &box);
671 /* Move the new buffer storage to the old pipe_resource. */
672 r600_replace_buffer_storage(&rctx->b, &res->b.b, newb);
673 pipe_resource_reference(&newb, NULL);
674
675 assert(res->b.b.bind & PIPE_BIND_SHARED);
676 assert(res->flags & RADEON_FLAG_NO_SUBALLOC);
677 }
678
679 /* Buffers */
680 offset = 0;
681 stride = 0;
682 slice_size = 0;
683 }
684
685 if (res->b.is_shared) {
686 /* USAGE_EXPLICIT_FLUSH must be cleared if at least one user
687 * doesn't set it.
688 */
689 res->external_usage |= usage & ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
690 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
691 res->external_usage &= ~PIPE_HANDLE_USAGE_EXPLICIT_FLUSH;
692 } else {
693 res->b.is_shared = true;
694 res->external_usage = usage;
695 }
696
697 return rscreen->ws->buffer_get_handle(res->buf, stride, offset,
698 slice_size, whandle);
699 }
700
701 static void r600_texture_destroy(struct pipe_screen *screen,
702 struct pipe_resource *ptex)
703 {
704 struct r600_texture *rtex = (struct r600_texture*)ptex;
705 struct r600_resource *resource = &rtex->resource;
706
707 r600_texture_reference(&rtex->flushed_depth_texture, NULL);
708
709 if (rtex->cmask_buffer != &rtex->resource) {
710 r600_resource_reference(&rtex->cmask_buffer, NULL);
711 }
712 pb_reference(&resource->buf, NULL);
713 r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
714 r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
715 FREE(rtex);
716 }
717
718 static const struct u_resource_vtbl r600_texture_vtbl;
719
720 /* The number of samples can be specified independently of the texture. */
721 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
722 struct r600_texture *rtex,
723 unsigned nr_samples,
724 struct r600_fmask_info *out)
725 {
726 /* FMASK is allocated like an ordinary texture. */
727 struct pipe_resource templ = rtex->resource.b.b;
728 struct radeon_surf fmask = {};
729 unsigned flags, bpe;
730
731 memset(out, 0, sizeof(*out));
732
733 if (rscreen->chip_class >= GFX9) {
734 out->alignment = rtex->surface.u.gfx9.fmask_alignment;
735 out->size = rtex->surface.u.gfx9.fmask_size;
736 return;
737 }
738
739 templ.nr_samples = 1;
740 flags = rtex->surface.flags | RADEON_SURF_FMASK;
741
742 if (rscreen->chip_class <= CAYMAN) {
743 /* Use the same parameters and tile mode. */
744 fmask.u.legacy.bankw = rtex->surface.u.legacy.bankw;
745 fmask.u.legacy.bankh = rtex->surface.u.legacy.bankh;
746 fmask.u.legacy.mtilea = rtex->surface.u.legacy.mtilea;
747 fmask.u.legacy.tile_split = rtex->surface.u.legacy.tile_split;
748
749 if (nr_samples <= 4)
750 fmask.u.legacy.bankh = 4;
751 }
752
753 switch (nr_samples) {
754 case 2:
755 case 4:
756 bpe = 1;
757 break;
758 case 8:
759 bpe = 4;
760 break;
761 default:
762 R600_ERR("Invalid sample count for FMASK allocation.\n");
763 return;
764 }
765
766 /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
767 * This can be fixed by writing a separate FMASK allocator specifically
768 * for R600-R700 asics. */
769 if (rscreen->chip_class <= R700) {
770 bpe *= 2;
771 }
772
773 if (rscreen->ws->surface_init(rscreen->ws, &templ, flags, bpe,
774 RADEON_SURF_MODE_2D, &fmask)) {
775 R600_ERR("Got error in surface_init while allocating FMASK.\n");
776 return;
777 }
778
779 assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
780
781 out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
782 if (out->slice_tile_max)
783 out->slice_tile_max -= 1;
784
785 out->tile_mode_index = fmask.u.legacy.tiling_index[0];
786 out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
787 out->bank_height = fmask.u.legacy.bankh;
788 out->tile_swizzle = fmask.tile_swizzle;
789 out->alignment = MAX2(256, fmask.surf_alignment);
790 out->size = fmask.surf_size;
791 }
792
793 static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
794 struct r600_texture *rtex)
795 {
796 r600_texture_get_fmask_info(rscreen, rtex,
797 rtex->resource.b.b.nr_samples, &rtex->fmask);
798
799 rtex->fmask.offset = align64(rtex->size, rtex->fmask.alignment);
800 rtex->size = rtex->fmask.offset + rtex->fmask.size;
801 }
802
803 void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
804 struct r600_texture *rtex,
805 struct r600_cmask_info *out)
806 {
807 unsigned cmask_tile_width = 8;
808 unsigned cmask_tile_height = 8;
809 unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
810 unsigned element_bits = 4;
811 unsigned cmask_cache_bits = 1024;
812 unsigned num_pipes = rscreen->info.num_tile_pipes;
813 unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
814
815 unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
816 unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
817 unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
818 unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
819 unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
820
821 unsigned pitch_elements = align(rtex->resource.b.b.width0, macro_tile_width);
822 unsigned height = align(rtex->resource.b.b.height0, macro_tile_height);
823
824 unsigned base_align = num_pipes * pipe_interleave_bytes;
825 unsigned slice_bytes =
826 ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
827
828 assert(macro_tile_width % 128 == 0);
829 assert(macro_tile_height % 128 == 0);
830
831 out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
832 out->alignment = MAX2(256, base_align);
833 out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
834 align(slice_bytes, base_align);
835 }
836
837 static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
838 struct r600_texture *rtex,
839 struct r600_cmask_info *out)
840 {
841 unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
842 unsigned num_pipes = rscreen->info.num_tile_pipes;
843 unsigned cl_width, cl_height;
844
845 if (rscreen->chip_class >= GFX9) {
846 out->alignment = rtex->surface.u.gfx9.cmask_alignment;
847 out->size = rtex->surface.u.gfx9.cmask_size;
848 return;
849 }
850
851 switch (num_pipes) {
852 case 2:
853 cl_width = 32;
854 cl_height = 16;
855 break;
856 case 4:
857 cl_width = 32;
858 cl_height = 32;
859 break;
860 case 8:
861 cl_width = 64;
862 cl_height = 32;
863 break;
864 case 16: /* Hawaii */
865 cl_width = 64;
866 cl_height = 64;
867 break;
868 default:
869 assert(0);
870 return;
871 }
872
873 unsigned base_align = num_pipes * pipe_interleave_bytes;
874
875 unsigned width = align(rtex->resource.b.b.width0, cl_width*8);
876 unsigned height = align(rtex->resource.b.b.height0, cl_height*8);
877 unsigned slice_elements = (width * height) / (8*8);
878
879 /* Each element of CMASK is a nibble. */
880 unsigned slice_bytes = slice_elements / 2;
881
882 out->slice_tile_max = (width * height) / (128*128);
883 if (out->slice_tile_max)
884 out->slice_tile_max -= 1;
885
886 out->alignment = MAX2(256, base_align);
887 out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
888 align(slice_bytes, base_align);
889 }
890
891 static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
892 struct r600_texture *rtex)
893 {
894 if (rscreen->chip_class >= SI) {
895 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
896 } else {
897 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
898 }
899
900 rtex->cmask.offset = align64(rtex->size, rtex->cmask.alignment);
901 rtex->size = rtex->cmask.offset + rtex->cmask.size;
902
903 if (rscreen->chip_class >= SI)
904 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
905 else
906 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
907 }
908
909 static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
910 struct r600_texture *rtex)
911 {
912 if (rtex->cmask_buffer)
913 return;
914
915 assert(rtex->cmask.size == 0);
916
917 if (rscreen->chip_class >= SI) {
918 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
919 } else {
920 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
921 }
922
923 rtex->cmask_buffer = (struct r600_resource *)
924 r600_aligned_buffer_create(&rscreen->b,
925 R600_RESOURCE_FLAG_UNMAPPABLE,
926 PIPE_USAGE_DEFAULT,
927 rtex->cmask.size,
928 rtex->cmask.alignment);
929 if (rtex->cmask_buffer == NULL) {
930 rtex->cmask.size = 0;
931 return;
932 }
933
934 /* update colorbuffer state bits */
935 rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
936
937 if (rscreen->chip_class >= SI)
938 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
939 else
940 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
941
942 p_atomic_inc(&rscreen->compressed_colortex_counter);
943 }
944
945 static void r600_texture_get_htile_size(struct r600_common_screen *rscreen,
946 struct r600_texture *rtex)
947 {
948 unsigned cl_width, cl_height, width, height;
949 unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
950 unsigned num_pipes = rscreen->info.num_tile_pipes;
951
952 assert(rscreen->chip_class <= VI);
953
954 rtex->surface.htile_size = 0;
955
956 if (rscreen->chip_class <= EVERGREEN &&
957 rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
958 return;
959
960 /* HW bug on R6xx. */
961 if (rscreen->chip_class == R600 &&
962 (rtex->resource.b.b.width0 > 7680 ||
963 rtex->resource.b.b.height0 > 7680))
964 return;
965
966 /* HTILE is broken with 1D tiling on old kernels and CIK. */
967 if (rscreen->chip_class >= CIK &&
968 rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
969 rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
970 return;
971
972 /* Overalign HTILE on P2 configs to work around GPU hangs in
973 * piglit/depthstencil-render-miplevels 585.
974 *
975 * This has been confirmed to help Kabini & Stoney, where the hangs
976 * are always reproducible. I think I have seen the test hang
977 * on Carrizo too, though it was very rare there.
978 */
979 if (rscreen->chip_class >= CIK && num_pipes < 4)
980 num_pipes = 4;
981
982 switch (num_pipes) {
983 case 1:
984 cl_width = 32;
985 cl_height = 16;
986 break;
987 case 2:
988 cl_width = 32;
989 cl_height = 32;
990 break;
991 case 4:
992 cl_width = 64;
993 cl_height = 32;
994 break;
995 case 8:
996 cl_width = 64;
997 cl_height = 64;
998 break;
999 case 16:
1000 cl_width = 128;
1001 cl_height = 64;
1002 break;
1003 default:
1004 assert(0);
1005 return;
1006 }
1007
1008 width = align(rtex->resource.b.b.width0, cl_width * 8);
1009 height = align(rtex->resource.b.b.height0, cl_height * 8);
1010
1011 slice_elements = (width * height) / (8 * 8);
1012 slice_bytes = slice_elements * 4;
1013
1014 pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
1015 base_align = num_pipes * pipe_interleave_bytes;
1016
1017 rtex->surface.htile_alignment = base_align;
1018 rtex->surface.htile_size =
1019 (util_max_layer(&rtex->resource.b.b, 0) + 1) *
1020 align(slice_bytes, base_align);
1021 }
1022
1023 static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
1024 struct r600_texture *rtex)
1025 {
1026 if (rscreen->chip_class <= VI && !rtex->tc_compatible_htile)
1027 r600_texture_get_htile_size(rscreen, rtex);
1028
1029 if (!rtex->surface.htile_size)
1030 return;
1031
1032 rtex->htile_offset = align(rtex->size, rtex->surface.htile_alignment);
1033 rtex->size = rtex->htile_offset + rtex->surface.htile_size;
1034 }
1035
1036 void r600_print_texture_info(struct r600_common_screen *rscreen,
1037 struct r600_texture *rtex, struct u_log_context *log)
1038 {
1039 int i;
1040
1041 /* Common parameters. */
1042 u_log_printf(log, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
1043 "blk_h=%u, array_size=%u, last_level=%u, "
1044 "bpe=%u, nsamples=%u, flags=0x%x, %s\n",
1045 rtex->resource.b.b.width0, rtex->resource.b.b.height0,
1046 rtex->resource.b.b.depth0, rtex->surface.blk_w,
1047 rtex->surface.blk_h,
1048 rtex->resource.b.b.array_size, rtex->resource.b.b.last_level,
1049 rtex->surface.bpe, rtex->resource.b.b.nr_samples,
1050 rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
1051
1052 if (rscreen->chip_class >= GFX9) {
1053 u_log_printf(log, " Surf: size=%"PRIu64", slice_size=%"PRIu64", "
1054 "alignment=%u, swmode=%u, epitch=%u, pitch=%u\n",
1055 rtex->surface.surf_size,
1056 rtex->surface.u.gfx9.surf_slice_size,
1057 rtex->surface.surf_alignment,
1058 rtex->surface.u.gfx9.surf.swizzle_mode,
1059 rtex->surface.u.gfx9.surf.epitch,
1060 rtex->surface.u.gfx9.surf_pitch);
1061
1062 if (rtex->fmask.size) {
1063 u_log_printf(log, " FMASK: offset=%"PRIu64", size=%"PRIu64", "
1064 "alignment=%u, swmode=%u, epitch=%u\n",
1065 rtex->fmask.offset,
1066 rtex->surface.u.gfx9.fmask_size,
1067 rtex->surface.u.gfx9.fmask_alignment,
1068 rtex->surface.u.gfx9.fmask.swizzle_mode,
1069 rtex->surface.u.gfx9.fmask.epitch);
1070 }
1071
1072 if (rtex->cmask.size) {
1073 u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", "
1074 "alignment=%u, rb_aligned=%u, pipe_aligned=%u\n",
1075 rtex->cmask.offset,
1076 rtex->surface.u.gfx9.cmask_size,
1077 rtex->surface.u.gfx9.cmask_alignment,
1078 rtex->surface.u.gfx9.cmask.rb_aligned,
1079 rtex->surface.u.gfx9.cmask.pipe_aligned);
1080 }
1081
1082 if (rtex->htile_offset) {
1083 u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
1084 "rb_aligned=%u, pipe_aligned=%u\n",
1085 rtex->htile_offset,
1086 rtex->surface.htile_size,
1087 rtex->surface.htile_alignment,
1088 rtex->surface.u.gfx9.htile.rb_aligned,
1089 rtex->surface.u.gfx9.htile.pipe_aligned);
1090 }
1091
1092 if (rtex->dcc_offset) {
1093 u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", "
1094 "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
1095 rtex->dcc_offset, rtex->surface.dcc_size,
1096 rtex->surface.dcc_alignment,
1097 rtex->surface.u.gfx9.dcc_pitch_max,
1098 rtex->surface.num_dcc_levels);
1099 }
1100
1101 if (rtex->surface.u.gfx9.stencil_offset) {
1102 u_log_printf(log, " Stencil: offset=%"PRIu64", swmode=%u, epitch=%u\n",
1103 rtex->surface.u.gfx9.stencil_offset,
1104 rtex->surface.u.gfx9.stencil.swizzle_mode,
1105 rtex->surface.u.gfx9.stencil.epitch);
1106 }
1107 return;
1108 }
1109
1110 u_log_printf(log, " Layout: size=%"PRIu64", alignment=%u, bankw=%u, "
1111 "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
1112 rtex->surface.surf_size, rtex->surface.surf_alignment, rtex->surface.u.legacy.bankw,
1113 rtex->surface.u.legacy.bankh, rtex->surface.u.legacy.num_banks, rtex->surface.u.legacy.mtilea,
1114 rtex->surface.u.legacy.tile_split, rtex->surface.u.legacy.pipe_config,
1115 (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
1116
1117 if (rtex->fmask.size)
1118 u_log_printf(log, " FMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, pitch_in_pixels=%u, "
1119 "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
1120 rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
1121 rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
1122 rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
1123
1124 if (rtex->cmask.size)
1125 u_log_printf(log, " CMask: offset=%"PRIu64", size=%"PRIu64", alignment=%u, "
1126 "slice_tile_max=%u\n",
1127 rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
1128 rtex->cmask.slice_tile_max);
1129
1130 if (rtex->htile_offset)
1131 u_log_printf(log, " HTile: offset=%"PRIu64", size=%"PRIu64", "
1132 "alignment=%u, TC_compatible = %u\n",
1133 rtex->htile_offset, rtex->surface.htile_size,
1134 rtex->surface.htile_alignment,
1135 rtex->tc_compatible_htile);
1136
1137 if (rtex->dcc_offset) {
1138 u_log_printf(log, " DCC: offset=%"PRIu64", size=%"PRIu64", alignment=%u\n",
1139 rtex->dcc_offset, rtex->surface.dcc_size,
1140 rtex->surface.dcc_alignment);
1141 for (i = 0; i <= rtex->resource.b.b.last_level; i++)
1142 u_log_printf(log, " DCCLevel[%i]: enabled=%u, offset=%"PRIu64", "
1143 "fast_clear_size=%"PRIu64"\n",
1144 i, i < rtex->surface.num_dcc_levels,
1145 rtex->surface.u.legacy.level[i].dcc_offset,
1146 rtex->surface.u.legacy.level[i].dcc_fast_clear_size);
1147 }
1148
1149 for (i = 0; i <= rtex->resource.b.b.last_level; i++)
1150 u_log_printf(log, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
1151 "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
1152 "mode=%u, tiling_index = %u\n",
1153 i, rtex->surface.u.legacy.level[i].offset,
1154 rtex->surface.u.legacy.level[i].slice_size,
1155 u_minify(rtex->resource.b.b.width0, i),
1156 u_minify(rtex->resource.b.b.height0, i),
1157 u_minify(rtex->resource.b.b.depth0, i),
1158 rtex->surface.u.legacy.level[i].nblk_x,
1159 rtex->surface.u.legacy.level[i].nblk_y,
1160 rtex->surface.u.legacy.level[i].mode,
1161 rtex->surface.u.legacy.tiling_index[i]);
1162
1163 if (rtex->surface.has_stencil) {
1164 u_log_printf(log, " StencilLayout: tilesplit=%u\n",
1165 rtex->surface.u.legacy.stencil_tile_split);
1166 for (i = 0; i <= rtex->resource.b.b.last_level; i++) {
1167 u_log_printf(log, " StencilLevel[%i]: offset=%"PRIu64", "
1168 "slice_size=%"PRIu64", npix_x=%u, "
1169 "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
1170 "mode=%u, tiling_index = %u\n",
1171 i, rtex->surface.u.legacy.stencil_level[i].offset,
1172 rtex->surface.u.legacy.stencil_level[i].slice_size,
1173 u_minify(rtex->resource.b.b.width0, i),
1174 u_minify(rtex->resource.b.b.height0, i),
1175 u_minify(rtex->resource.b.b.depth0, i),
1176 rtex->surface.u.legacy.stencil_level[i].nblk_x,
1177 rtex->surface.u.legacy.stencil_level[i].nblk_y,
1178 rtex->surface.u.legacy.stencil_level[i].mode,
1179 rtex->surface.u.legacy.stencil_tiling_index[i]);
1180 }
1181 }
1182 }
1183
1184 /* Common processing for r600_texture_create and r600_texture_from_handle */
1185 static struct r600_texture *
1186 r600_texture_create_object(struct pipe_screen *screen,
1187 const struct pipe_resource *base,
1188 struct pb_buffer *buf,
1189 struct radeon_surf *surface)
1190 {
1191 struct r600_texture *rtex;
1192 struct r600_resource *resource;
1193 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1194
1195 rtex = CALLOC_STRUCT(r600_texture);
1196 if (!rtex)
1197 return NULL;
1198
1199 resource = &rtex->resource;
1200 resource->b.b = *base;
1201 resource->b.b.next = NULL;
1202 resource->b.vtbl = &r600_texture_vtbl;
1203 pipe_reference_init(&resource->b.b.reference, 1);
1204 resource->b.b.screen = screen;
1205
1206 /* don't include stencil-only formats which we don't support for rendering */
1207 rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
1208
1209 rtex->surface = *surface;
1210 rtex->size = rtex->surface.surf_size;
1211
1212 rtex->tc_compatible_htile = rtex->surface.htile_size != 0 &&
1213 (rtex->surface.flags &
1214 RADEON_SURF_TC_COMPATIBLE_HTILE);
1215
1216 /* TC-compatible HTILE:
1217 * - VI only supports Z32_FLOAT.
1218 * - GFX9 only supports Z32_FLOAT and Z16_UNORM. */
1219 if (rtex->tc_compatible_htile) {
1220 if (rscreen->chip_class >= GFX9 &&
1221 base->format == PIPE_FORMAT_Z16_UNORM)
1222 rtex->db_render_format = base->format;
1223 else
1224 rtex->db_render_format = PIPE_FORMAT_Z32_FLOAT;
1225 } else {
1226 rtex->db_render_format = base->format;
1227 }
1228
1229 /* Tiled depth textures utilize the non-displayable tile order.
1230 * This must be done after r600_setup_surface.
1231 * Applies to R600-Cayman. */
1232 rtex->non_disp_tiling = rtex->is_depth && rtex->surface.u.legacy.level[0].mode >= RADEON_SURF_MODE_1D;
1233 /* Applies to GCN. */
1234 rtex->last_msaa_resolve_target_micro_mode = rtex->surface.micro_tile_mode;
1235
1236 /* Disable separate DCC at the beginning. DRI2 doesn't reuse buffers
1237 * between frames, so the only thing that can enable separate DCC
1238 * with DRI2 is multiple slow clears within a frame.
1239 */
1240 rtex->ps_draw_ratio = 0;
1241
1242 if (rtex->is_depth) {
1243 if (base->flags & (R600_RESOURCE_FLAG_TRANSFER |
1244 R600_RESOURCE_FLAG_FLUSHED_DEPTH) ||
1245 rscreen->chip_class >= EVERGREEN) {
1246 if (rscreen->chip_class >= GFX9) {
1247 rtex->can_sample_z = true;
1248 rtex->can_sample_s = true;
1249 } else {
1250 rtex->can_sample_z = !rtex->surface.u.legacy.depth_adjusted;
1251 rtex->can_sample_s = !rtex->surface.u.legacy.stencil_adjusted;
1252 }
1253 } else {
1254 if (rtex->resource.b.b.nr_samples <= 1 &&
1255 (rtex->resource.b.b.format == PIPE_FORMAT_Z16_UNORM ||
1256 rtex->resource.b.b.format == PIPE_FORMAT_Z32_FLOAT))
1257 rtex->can_sample_z = true;
1258 }
1259
1260 if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
1261 R600_RESOURCE_FLAG_FLUSHED_DEPTH))) {
1262 rtex->db_compatible = true;
1263
1264 if (!(rscreen->debug_flags & DBG_NO_HYPERZ))
1265 r600_texture_allocate_htile(rscreen, rtex);
1266 }
1267 } else {
1268 if (base->nr_samples > 1) {
1269 if (!buf) {
1270 r600_texture_allocate_fmask(rscreen, rtex);
1271 r600_texture_allocate_cmask(rscreen, rtex);
1272 rtex->cmask_buffer = &rtex->resource;
1273 }
1274 if (!rtex->fmask.size || !rtex->cmask.size) {
1275 FREE(rtex);
1276 return NULL;
1277 }
1278 }
1279
1280 /* Shared textures must always set up DCC here.
1281 * If it's not present, it will be disabled by
1282 * apply_opaque_metadata later.
1283 */
1284 if (rtex->surface.dcc_size &&
1285 (buf || !(rscreen->debug_flags & DBG_NO_DCC)) &&
1286 !(rtex->surface.flags & RADEON_SURF_SCANOUT)) {
1287 /* Reserve space for the DCC buffer. */
1288 rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
1289 rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
1290 }
1291 }
1292
1293 /* Now create the backing buffer. */
1294 if (!buf) {
1295 r600_init_resource_fields(rscreen, resource, rtex->size,
1296 rtex->surface.surf_alignment);
1297
1298 /* Displayable surfaces are not suballocated. */
1299 if (resource->b.b.bind & PIPE_BIND_SCANOUT)
1300 resource->flags |= RADEON_FLAG_NO_SUBALLOC;
1301
1302 if (!r600_alloc_resource(rscreen, resource)) {
1303 FREE(rtex);
1304 return NULL;
1305 }
1306 } else {
1307 resource->buf = buf;
1308 resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
1309 resource->bo_size = buf->size;
1310 resource->bo_alignment = buf->alignment;
1311 resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
1312 if (resource->domains & RADEON_DOMAIN_VRAM)
1313 resource->vram_usage = buf->size;
1314 else if (resource->domains & RADEON_DOMAIN_GTT)
1315 resource->gart_usage = buf->size;
1316 }
1317
1318 if (rtex->cmask.size) {
1319 /* Initialize the cmask to 0xCC (= compressed state). */
1320 r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
1321 rtex->cmask.offset, rtex->cmask.size,
1322 0xCCCCCCCC);
1323 }
1324 if (rtex->htile_offset) {
1325 uint32_t clear_value = 0;
1326
1327 if (rscreen->chip_class >= GFX9 || rtex->tc_compatible_htile)
1328 clear_value = 0x0000030F;
1329
1330 r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
1331 rtex->htile_offset,
1332 rtex->surface.htile_size,
1333 clear_value);
1334 }
1335
1336 /* Initialize DCC only if the texture is not being imported. */
1337 if (!buf && rtex->dcc_offset) {
1338 r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
1339 rtex->dcc_offset,
1340 rtex->surface.dcc_size,
1341 0xFFFFFFFF);
1342 }
1343
1344 /* Initialize the CMASK base register value. */
1345 rtex->cmask.base_address_reg =
1346 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
1347
1348 if (rscreen->debug_flags & DBG_VM) {
1349 fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
1350 rtex->resource.gpu_address,
1351 rtex->resource.gpu_address + rtex->resource.buf->size,
1352 base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1,
1353 base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
1354 }
1355
1356 if (rscreen->debug_flags & DBG_TEX) {
1357 puts("Texture:");
1358 struct u_log_context log;
1359 u_log_context_init(&log);
1360 r600_print_texture_info(rscreen, rtex, &log);
1361 u_log_new_page_print(&log, stdout);
1362 fflush(stdout);
1363 u_log_context_destroy(&log);
1364 }
1365
1366 return rtex;
1367 }
1368
1369 static enum radeon_surf_mode
1370 r600_choose_tiling(struct r600_common_screen *rscreen,
1371 const struct pipe_resource *templ)
1372 {
1373 const struct util_format_description *desc = util_format_description(templ->format);
1374 bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
1375 bool is_depth_stencil = util_format_is_depth_or_stencil(templ->format) &&
1376 !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
1377
1378 /* MSAA resources must be 2D tiled. */
1379 if (templ->nr_samples > 1)
1380 return RADEON_SURF_MODE_2D;
1381
1382 /* Transfer resources should be linear. */
1383 if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
1384 return RADEON_SURF_MODE_LINEAR_ALIGNED;
1385
1386 /* Avoid Z/S decompress blits by forcing TC-compatible HTILE on VI,
1387 * which requires 2D tiling.
1388 */
1389 if (rscreen->chip_class == VI &&
1390 is_depth_stencil &&
1391 (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY))
1392 return RADEON_SURF_MODE_2D;
1393
1394 /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
1395 if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
1396 (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
1397 (templ->target == PIPE_TEXTURE_2D ||
1398 templ->target == PIPE_TEXTURE_3D))
1399 force_tiling = true;
1400
1401 /* Handle common candidates for the linear mode.
1402 * Compressed textures and DB surfaces must always be tiled.
1403 */
1404 if (!force_tiling &&
1405 !is_depth_stencil &&
1406 !util_format_is_compressed(templ->format)) {
1407 if (rscreen->debug_flags & DBG_NO_TILING)
1408 return RADEON_SURF_MODE_LINEAR_ALIGNED;
1409
1410 /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
1411 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
1412 return RADEON_SURF_MODE_LINEAR_ALIGNED;
1413
1414 /* Cursors are linear on SI.
1415 * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
1416 if (rscreen->chip_class >= SI &&
1417 (templ->bind & PIPE_BIND_CURSOR))
1418 return RADEON_SURF_MODE_LINEAR_ALIGNED;
1419
1420 if (templ->bind & PIPE_BIND_LINEAR)
1421 return RADEON_SURF_MODE_LINEAR_ALIGNED;
1422
1423 /* Textures with a very small height are recommended to be linear. */
1424 if (templ->target == PIPE_TEXTURE_1D ||
1425 templ->target == PIPE_TEXTURE_1D_ARRAY ||
1426 /* Only very thin and long 2D textures should benefit from
1427 * linear_aligned. */
1428 (templ->width0 > 8 && templ->height0 <= 2))
1429 return RADEON_SURF_MODE_LINEAR_ALIGNED;
1430
1431 /* Textures likely to be mapped often. */
1432 if (templ->usage == PIPE_USAGE_STAGING ||
1433 templ->usage == PIPE_USAGE_STREAM)
1434 return RADEON_SURF_MODE_LINEAR_ALIGNED;
1435 }
1436
1437 /* Make small textures 1D tiled. */
1438 if (templ->width0 <= 16 || templ->height0 <= 16 ||
1439 (rscreen->debug_flags & DBG_NO_2D_TILING))
1440 return RADEON_SURF_MODE_1D;
1441
1442 /* The allocator will switch to 1D if needed. */
1443 return RADEON_SURF_MODE_2D;
1444 }
1445
1446 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
1447 const struct pipe_resource *templ)
1448 {
1449 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1450 struct radeon_surf surface = {0};
1451 bool is_flushed_depth = templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH;
1452 bool tc_compatible_htile =
1453 rscreen->chip_class >= VI &&
1454 (templ->flags & PIPE_RESOURCE_FLAG_TEXTURING_MORE_LIKELY) &&
1455 !(rscreen->debug_flags & DBG_NO_HYPERZ) &&
1456 !is_flushed_depth &&
1457 templ->nr_samples <= 1 && /* TC-compat HTILE is less efficient with MSAA */
1458 util_format_is_depth_or_stencil(templ->format);
1459
1460 int r;
1461
1462 r = r600_init_surface(rscreen, &surface, templ,
1463 r600_choose_tiling(rscreen, templ), 0, 0,
1464 false, false, is_flushed_depth,
1465 tc_compatible_htile);
1466 if (r) {
1467 return NULL;
1468 }
1469
1470 return (struct pipe_resource *)
1471 r600_texture_create_object(screen, templ, NULL, &surface);
1472 }
1473
1474 static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
1475 const struct pipe_resource *templ,
1476 struct winsys_handle *whandle,
1477 unsigned usage)
1478 {
1479 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
1480 struct pb_buffer *buf = NULL;
1481 unsigned stride = 0, offset = 0;
1482 enum radeon_surf_mode array_mode;
1483 struct radeon_surf surface = {};
1484 int r;
1485 struct radeon_bo_metadata metadata = {};
1486 struct r600_texture *rtex;
1487 bool is_scanout;
1488
1489 /* Support only 2D textures without mipmaps */
1490 if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
1491 templ->depth0 != 1 || templ->last_level != 0)
1492 return NULL;
1493
1494 buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride, &offset);
1495 if (!buf)
1496 return NULL;
1497
1498 rscreen->ws->buffer_get_metadata(buf, &metadata);
1499 r600_surface_import_metadata(rscreen, &surface, &metadata,
1500 &array_mode, &is_scanout);
1501
1502 r = r600_init_surface(rscreen, &surface, templ, array_mode, stride,
1503 offset, true, is_scanout, false, false);
1504 if (r) {
1505 return NULL;
1506 }
1507
1508 rtex = r600_texture_create_object(screen, templ, buf, &surface);
1509 if (!rtex)
1510 return NULL;
1511
1512 rtex->resource.b.is_shared = true;
1513 rtex->resource.external_usage = usage;
1514
1515 if (rscreen->apply_opaque_metadata)
1516 rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
1517
1518 assert(rtex->surface.tile_swizzle == 0);
1519 return &rtex->resource.b.b;
1520 }
1521
1522 bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
1523 struct pipe_resource *texture,
1524 struct r600_texture **staging)
1525 {
1526 struct r600_texture *rtex = (struct r600_texture*)texture;
1527 struct pipe_resource resource;
1528 struct r600_texture **flushed_depth_texture = staging ?
1529 staging : &rtex->flushed_depth_texture;
1530 enum pipe_format pipe_format = texture->format;
1531
1532 if (!staging) {
1533 if (rtex->flushed_depth_texture)
1534 return true; /* it's ready */
1535
1536 if (!rtex->can_sample_z && rtex->can_sample_s) {
1537 switch (pipe_format) {
1538 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1539 /* Save memory by not allocating the S plane. */
1540 pipe_format = PIPE_FORMAT_Z32_FLOAT;
1541 break;
1542 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
1543 case PIPE_FORMAT_S8_UINT_Z24_UNORM:
1544 /* Save memory bandwidth by not copying the
1545 * stencil part during flush.
1546 *
1547 * This potentially increases memory bandwidth
1548 * if an application uses both Z and S texturing
1549 * simultaneously (a flushed Z24S8 texture
1550 * would be stored compactly), but how often
1551 * does that really happen?
1552 */
1553 pipe_format = PIPE_FORMAT_Z24X8_UNORM;
1554 break;
1555 default:;
1556 }
1557 } else if (!rtex->can_sample_s && rtex->can_sample_z) {
1558 assert(util_format_has_stencil(util_format_description(pipe_format)));
1559
1560 /* DB->CB copies to an 8bpp surface don't work. */
1561 pipe_format = PIPE_FORMAT_X24S8_UINT;
1562 }
1563 }
1564
1565 memset(&resource, 0, sizeof(resource));
1566 resource.target = texture->target;
1567 resource.format = pipe_format;
1568 resource.width0 = texture->width0;
1569 resource.height0 = texture->height0;
1570 resource.depth0 = texture->depth0;
1571 resource.array_size = texture->array_size;
1572 resource.last_level = texture->last_level;
1573 resource.nr_samples = texture->nr_samples;
1574 resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
1575 resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
1576 resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
1577
1578 if (staging)
1579 resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
1580
1581 *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1582 if (*flushed_depth_texture == NULL) {
1583 R600_ERR("failed to create temporary texture to hold flushed depth\n");
1584 return false;
1585 }
1586
1587 (*flushed_depth_texture)->non_disp_tiling = false;
1588 return true;
1589 }
1590
1591 /**
1592 * Initialize the pipe_resource descriptor to be of the same size as the box,
1593 * which is supposed to hold a subregion of the texture "orig" at the given
1594 * mipmap level.
1595 */
1596 static void r600_init_temp_resource_from_box(struct pipe_resource *res,
1597 struct pipe_resource *orig,
1598 const struct pipe_box *box,
1599 unsigned level, unsigned flags)
1600 {
1601 memset(res, 0, sizeof(*res));
1602 res->format = orig->format;
1603 res->width0 = box->width;
1604 res->height0 = box->height;
1605 res->depth0 = 1;
1606 res->array_size = 1;
1607 res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
1608 res->flags = flags;
1609
1610 /* We must set the correct texture target and dimensions for a 3D box. */
1611 if (box->depth > 1 && util_max_layer(orig, level) > 0) {
1612 res->target = PIPE_TEXTURE_2D_ARRAY;
1613 res->array_size = box->depth;
1614 } else {
1615 res->target = PIPE_TEXTURE_2D;
1616 }
1617 }
1618
1619 static bool r600_can_invalidate_texture(struct r600_common_screen *rscreen,
1620 struct r600_texture *rtex,
1621 unsigned transfer_usage,
1622 const struct pipe_box *box)
1623 {
1624 /* r600g doesn't react to dirty_tex_descriptor_counter */
1625 return rscreen->chip_class >= SI &&
1626 !rtex->resource.b.is_shared &&
1627 !(transfer_usage & PIPE_TRANSFER_READ) &&
1628 rtex->resource.b.b.last_level == 0 &&
1629 util_texrange_covers_whole_level(&rtex->resource.b.b, 0,
1630 box->x, box->y, box->z,
1631 box->width, box->height,
1632 box->depth);
1633 }
1634
1635 static void r600_texture_invalidate_storage(struct r600_common_context *rctx,
1636 struct r600_texture *rtex)
1637 {
1638 struct r600_common_screen *rscreen = rctx->screen;
1639
1640 /* There is no point in discarding depth and tiled buffers. */
1641 assert(!rtex->is_depth);
1642 assert(rtex->surface.is_linear);
1643
1644 /* Reallocate the buffer in the same pipe_resource. */
1645 r600_alloc_resource(rscreen, &rtex->resource);
1646
1647 /* Initialize the CMASK base address (needed even without CMASK). */
1648 rtex->cmask.base_address_reg =
1649 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
1650
1651 p_atomic_inc(&rscreen->dirty_tex_counter);
1652
1653 rctx->num_alloc_tex_transfer_bytes += rtex->size;
1654 }
1655
1656 static void *r600_texture_transfer_map(struct pipe_context *ctx,
1657 struct pipe_resource *texture,
1658 unsigned level,
1659 unsigned usage,
1660 const struct pipe_box *box,
1661 struct pipe_transfer **ptransfer)
1662 {
1663 struct r600_common_context *rctx = (struct r600_common_context*)ctx;
1664 struct r600_texture *rtex = (struct r600_texture*)texture;
1665 struct r600_transfer *trans;
1666 struct r600_resource *buf;
1667 unsigned offset = 0;
1668 char *map;
1669 bool use_staging_texture = false;
1670
1671 assert(!(texture->flags & R600_RESOURCE_FLAG_TRANSFER));
1672 assert(box->width && box->height && box->depth);
1673
1674 /* Depth textures use staging unconditionally. */
1675 if (!rtex->is_depth) {
1676 /* Degrade the tile mode if we get too many transfers on APUs.
1677 * On dGPUs, the staging texture is always faster.
1678 * Only count uploads that are at least 4x4 pixels large.
1679 */
1680 if (!rctx->screen->info.has_dedicated_vram &&
1681 level == 0 &&
1682 box->width >= 4 && box->height >= 4 &&
1683 p_atomic_inc_return(&rtex->num_level0_transfers) == 10) {
1684 bool can_invalidate =
1685 r600_can_invalidate_texture(rctx->screen, rtex,
1686 usage, box);
1687
1688 r600_reallocate_texture_inplace(rctx, rtex,
1689 PIPE_BIND_LINEAR,
1690 can_invalidate);
1691 }
1692
1693 /* Tiled textures need to be converted into a linear texture for CPU
1694 * access. The staging texture is always linear and is placed in GART.
1695 *
1696 * Reading from VRAM or GTT WC is slow, always use the staging
1697 * texture in this case.
1698 *
1699 * Use the staging texture for uploads if the underlying BO
1700 * is busy.
1701 */
1702 if (!rtex->surface.is_linear)
1703 use_staging_texture = true;
1704 else if (usage & PIPE_TRANSFER_READ)
1705 use_staging_texture =
1706 rtex->resource.domains & RADEON_DOMAIN_VRAM ||
1707 rtex->resource.flags & RADEON_FLAG_GTT_WC;
1708 /* Write & linear only: */
1709 else if (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf,
1710 RADEON_USAGE_READWRITE) ||
1711 !rctx->ws->buffer_wait(rtex->resource.buf, 0,
1712 RADEON_USAGE_READWRITE)) {
1713 /* It's busy. */
1714 if (r600_can_invalidate_texture(rctx->screen, rtex,
1715 usage, box))
1716 r600_texture_invalidate_storage(rctx, rtex);
1717 else
1718 use_staging_texture = true;
1719 }
1720 }
1721
1722 trans = CALLOC_STRUCT(r600_transfer);
1723 if (!trans)
1724 return NULL;
1725 pipe_resource_reference(&trans->b.b.resource, texture);
1726 trans->b.b.level = level;
1727 trans->b.b.usage = usage;
1728 trans->b.b.box = *box;
1729
1730 if (rtex->is_depth) {
1731 struct r600_texture *staging_depth;
1732
1733 if (rtex->resource.b.b.nr_samples > 1) {
1734 /* MSAA depth buffers need to be converted to single sample buffers.
1735 *
1736 * Mapping MSAA depth buffers can occur if ReadPixels is called
1737 * with a multisample GLX visual.
1738 *
1739 * First downsample the depth buffer to a temporary texture,
1740 * then decompress the temporary one to staging.
1741 *
1742 * Only the region being mapped is transfered.
1743 */
1744 struct pipe_resource resource;
1745
1746 r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
1747
1748 if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
1749 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1750 FREE(trans);
1751 return NULL;
1752 }
1753
1754 if (usage & PIPE_TRANSFER_READ) {
1755 struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
1756 if (!temp) {
1757 R600_ERR("failed to create a temporary depth texture\n");
1758 FREE(trans);
1759 return NULL;
1760 }
1761
1762 r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
1763 rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
1764 0, 0, 0, box->depth, 0, 0);
1765 pipe_resource_reference(&temp, NULL);
1766 }
1767
1768 /* Just get the strides. */
1769 r600_texture_get_offset(rctx->screen, staging_depth, level, NULL,
1770 &trans->b.b.stride,
1771 &trans->b.b.layer_stride);
1772 } else {
1773 /* XXX: only readback the rectangle which is being mapped? */
1774 /* XXX: when discard is true, no need to read back from depth texture */
1775 if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
1776 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1777 FREE(trans);
1778 return NULL;
1779 }
1780
1781 rctx->blit_decompress_depth(ctx, rtex, staging_depth,
1782 level, level,
1783 box->z, box->z + box->depth - 1,
1784 0, 0);
1785
1786 offset = r600_texture_get_offset(rctx->screen, staging_depth,
1787 level, box,
1788 &trans->b.b.stride,
1789 &trans->b.b.layer_stride);
1790 }
1791
1792 trans->staging = (struct r600_resource*)staging_depth;
1793 buf = trans->staging;
1794 } else if (use_staging_texture) {
1795 struct pipe_resource resource;
1796 struct r600_texture *staging;
1797
1798 r600_init_temp_resource_from_box(&resource, texture, box, level,
1799 R600_RESOURCE_FLAG_TRANSFER);
1800 resource.usage = (usage & PIPE_TRANSFER_READ) ?
1801 PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
1802
1803 /* Create the temporary texture. */
1804 staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
1805 if (!staging) {
1806 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1807 FREE(trans);
1808 return NULL;
1809 }
1810 trans->staging = &staging->resource;
1811
1812 /* Just get the strides. */
1813 r600_texture_get_offset(rctx->screen, staging, 0, NULL,
1814 &trans->b.b.stride,
1815 &trans->b.b.layer_stride);
1816
1817 if (usage & PIPE_TRANSFER_READ)
1818 r600_copy_to_staging_texture(ctx, trans);
1819 else
1820 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1821
1822 buf = trans->staging;
1823 } else {
1824 /* the resource is mapped directly */
1825 offset = r600_texture_get_offset(rctx->screen, rtex, level, box,
1826 &trans->b.b.stride,
1827 &trans->b.b.layer_stride);
1828 buf = &rtex->resource;
1829 }
1830
1831 if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
1832 r600_resource_reference(&trans->staging, NULL);
1833 FREE(trans);
1834 return NULL;
1835 }
1836
1837 *ptransfer = &trans->b.b;
1838 return map + offset;
1839 }
1840
1841 static void r600_texture_transfer_unmap(struct pipe_context *ctx,
1842 struct pipe_transfer* transfer)
1843 {
1844 struct r600_common_context *rctx = (struct r600_common_context*)ctx;
1845 struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
1846 struct pipe_resource *texture = transfer->resource;
1847 struct r600_texture *rtex = (struct r600_texture*)texture;
1848
1849 if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
1850 if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
1851 ctx->resource_copy_region(ctx, texture, transfer->level,
1852 transfer->box.x, transfer->box.y, transfer->box.z,
1853 &rtransfer->staging->b.b, transfer->level,
1854 &transfer->box);
1855 } else {
1856 r600_copy_from_staging_texture(ctx, rtransfer);
1857 }
1858 }
1859
1860 if (rtransfer->staging) {
1861 rctx->num_alloc_tex_transfer_bytes += rtransfer->staging->buf->size;
1862 r600_resource_reference(&rtransfer->staging, NULL);
1863 }
1864
1865 /* Heuristic for {upload, draw, upload, draw, ..}:
1866 *
1867 * Flush the gfx IB if we've allocated too much texture storage.
1868 *
1869 * The idea is that we don't want to build IBs that use too much
1870 * memory and put pressure on the kernel memory manager and we also
1871 * want to make temporary and invalidated buffers go idle ASAP to
1872 * decrease the total memory usage or make them reusable. The memory
1873 * usage will be slightly higher than given here because of the buffer
1874 * cache in the winsys.
1875 *
1876 * The result is that the kernel memory manager is never a bottleneck.
1877 */
1878 if (rctx->num_alloc_tex_transfer_bytes > rctx->screen->info.gart_size / 4) {
1879 rctx->gfx.flush(rctx, RADEON_FLUSH_ASYNC, NULL);
1880 rctx->num_alloc_tex_transfer_bytes = 0;
1881 }
1882
1883 pipe_resource_reference(&transfer->resource, NULL);
1884 FREE(transfer);
1885 }
1886
1887 static const struct u_resource_vtbl r600_texture_vtbl =
1888 {
1889 NULL, /* get_handle */
1890 r600_texture_destroy, /* resource_destroy */
1891 r600_texture_transfer_map, /* transfer_map */
1892 u_default_transfer_flush_region, /* transfer_flush_region */
1893 r600_texture_transfer_unmap, /* transfer_unmap */
1894 };
1895
1896 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
1897 struct pipe_resource *texture,
1898 const struct pipe_surface *templ,
1899 unsigned width0, unsigned height0,
1900 unsigned width, unsigned height)
1901 {
1902 struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
1903
1904 if (!surface)
1905 return NULL;
1906
1907 assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
1908 assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
1909
1910 pipe_reference_init(&surface->base.reference, 1);
1911 pipe_resource_reference(&surface->base.texture, texture);
1912 surface->base.context = pipe;
1913 surface->base.format = templ->format;
1914 surface->base.width = width;
1915 surface->base.height = height;
1916 surface->base.u = templ->u;
1917
1918 surface->width0 = width0;
1919 surface->height0 = height0;
1920
1921 return &surface->base;
1922 }
1923
1924 static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
1925 struct pipe_resource *tex,
1926 const struct pipe_surface *templ)
1927 {
1928 unsigned level = templ->u.tex.level;
1929 unsigned width = u_minify(tex->width0, level);
1930 unsigned height = u_minify(tex->height0, level);
1931 unsigned width0 = tex->width0;
1932 unsigned height0 = tex->height0;
1933
1934 if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
1935 const struct util_format_description *tex_desc
1936 = util_format_description(tex->format);
1937 const struct util_format_description *templ_desc
1938 = util_format_description(templ->format);
1939
1940 assert(tex_desc->block.bits == templ_desc->block.bits);
1941
1942 /* Adjust size of surface if and only if the block width or
1943 * height is changed. */
1944 if (tex_desc->block.width != templ_desc->block.width ||
1945 tex_desc->block.height != templ_desc->block.height) {
1946 unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
1947 unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
1948
1949 width = nblks_x * templ_desc->block.width;
1950 height = nblks_y * templ_desc->block.height;
1951
1952 width0 = util_format_get_nblocksx(tex->format, width0);
1953 height0 = util_format_get_nblocksy(tex->format, height0);
1954 }
1955 }
1956
1957 return r600_create_surface_custom(pipe, tex, templ,
1958 width0, height0,
1959 width, height);
1960 }
1961
1962 static void r600_surface_destroy(struct pipe_context *pipe,
1963 struct pipe_surface *surface)
1964 {
1965 struct r600_surface *surf = (struct r600_surface*)surface;
1966 r600_resource_reference(&surf->cb_buffer_fmask, NULL);
1967 r600_resource_reference(&surf->cb_buffer_cmask, NULL);
1968 pipe_resource_reference(&surface->texture, NULL);
1969 FREE(surface);
1970 }
1971
1972 static void r600_clear_texture(struct pipe_context *pipe,
1973 struct pipe_resource *tex,
1974 unsigned level,
1975 const struct pipe_box *box,
1976 const void *data)
1977 {
1978 struct pipe_screen *screen = pipe->screen;
1979 struct r600_texture *rtex = (struct r600_texture*)tex;
1980 struct pipe_surface tmpl = {{0}};
1981 struct pipe_surface *sf;
1982 const struct util_format_description *desc =
1983 util_format_description(tex->format);
1984
1985 tmpl.format = tex->format;
1986 tmpl.u.tex.first_layer = box->z;
1987 tmpl.u.tex.last_layer = box->z + box->depth - 1;
1988 tmpl.u.tex.level = level;
1989 sf = pipe->create_surface(pipe, tex, &tmpl);
1990 if (!sf)
1991 return;
1992
1993 if (rtex->is_depth) {
1994 unsigned clear;
1995 float depth;
1996 uint8_t stencil = 0;
1997
1998 /* Depth is always present. */
1999 clear = PIPE_CLEAR_DEPTH;
2000 desc->unpack_z_float(&depth, 0, data, 0, 1, 1);
2001
2002 if (rtex->surface.has_stencil) {
2003 clear |= PIPE_CLEAR_STENCIL;
2004 desc->unpack_s_8uint(&stencil, 0, data, 0, 1, 1);
2005 }
2006
2007 pipe->clear_depth_stencil(pipe, sf, clear, depth, stencil,
2008 box->x, box->y,
2009 box->width, box->height, false);
2010 } else {
2011 union pipe_color_union color;
2012
2013 /* pipe_color_union requires the full vec4 representation. */
2014 if (util_format_is_pure_uint(tex->format))
2015 desc->unpack_rgba_uint(color.ui, 0, data, 0, 1, 1);
2016 else if (util_format_is_pure_sint(tex->format))
2017 desc->unpack_rgba_sint(color.i, 0, data, 0, 1, 1);
2018 else
2019 desc->unpack_rgba_float(color.f, 0, data, 0, 1, 1);
2020
2021 if (screen->is_format_supported(screen, tex->format,
2022 tex->target, 0,
2023 PIPE_BIND_RENDER_TARGET)) {
2024 pipe->clear_render_target(pipe, sf, &color,
2025 box->x, box->y,
2026 box->width, box->height, false);
2027 } else {
2028 /* Software fallback - just for R9G9B9E5_FLOAT */
2029 util_clear_render_target(pipe, sf, &color,
2030 box->x, box->y,
2031 box->width, box->height);
2032 }
2033 }
2034 pipe_surface_reference(&sf, NULL);
2035 }
2036
2037 unsigned r600_translate_colorswap(enum pipe_format format, bool do_endian_swap)
2038 {
2039 const struct util_format_description *desc = util_format_description(format);
2040
2041 #define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == PIPE_SWIZZLE_##swz)
2042
2043 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
2044 return V_0280A0_SWAP_STD;
2045
2046 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
2047 return ~0U;
2048
2049 switch (desc->nr_channels) {
2050 case 1:
2051 if (HAS_SWIZZLE(0,X))
2052 return V_0280A0_SWAP_STD; /* X___ */
2053 else if (HAS_SWIZZLE(3,X))
2054 return V_0280A0_SWAP_ALT_REV; /* ___X */
2055 break;
2056 case 2:
2057 if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
2058 (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
2059 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
2060 return V_0280A0_SWAP_STD; /* XY__ */
2061 else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
2062 (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
2063 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
2064 /* YX__ */
2065 return (do_endian_swap ? V_0280A0_SWAP_STD : V_0280A0_SWAP_STD_REV);
2066 else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
2067 return V_0280A0_SWAP_ALT; /* X__Y */
2068 else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
2069 return V_0280A0_SWAP_ALT_REV; /* Y__X */
2070 break;
2071 case 3:
2072 if (HAS_SWIZZLE(0,X))
2073 return (do_endian_swap ? V_0280A0_SWAP_STD_REV : V_0280A0_SWAP_STD);
2074 else if (HAS_SWIZZLE(0,Z))
2075 return V_0280A0_SWAP_STD_REV; /* ZYX */
2076 break;
2077 case 4:
2078 /* check the middle channels, the 1st and 4th channel can be NONE */
2079 if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z)) {
2080 return V_0280A0_SWAP_STD; /* XYZW */
2081 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y)) {
2082 return V_0280A0_SWAP_STD_REV; /* WZYX */
2083 } else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X)) {
2084 return V_0280A0_SWAP_ALT; /* ZYXW */
2085 } else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W)) {
2086 /* YZWX */
2087 if (desc->is_array)
2088 return V_0280A0_SWAP_ALT_REV;
2089 else
2090 return (do_endian_swap ? V_0280A0_SWAP_ALT : V_0280A0_SWAP_ALT_REV);
2091 }
2092 break;
2093 }
2094 return ~0U;
2095 }
2096
2097 /* FAST COLOR CLEAR */
2098
2099 static void evergreen_set_clear_color(struct r600_texture *rtex,
2100 enum pipe_format surface_format,
2101 const union pipe_color_union *color)
2102 {
2103 union util_color uc;
2104
2105 memset(&uc, 0, sizeof(uc));
2106
2107 if (rtex->surface.bpe == 16) {
2108 /* DCC fast clear only:
2109 * CLEAR_WORD0 = R = G = B
2110 * CLEAR_WORD1 = A
2111 */
2112 assert(color->ui[0] == color->ui[1] &&
2113 color->ui[0] == color->ui[2]);
2114 uc.ui[0] = color->ui[0];
2115 uc.ui[1] = color->ui[3];
2116 } else if (util_format_is_pure_uint(surface_format)) {
2117 util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
2118 } else if (util_format_is_pure_sint(surface_format)) {
2119 util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
2120 } else {
2121 util_pack_color(color->f, surface_format, &uc);
2122 }
2123
2124 memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
2125 }
2126
2127 /* Set the same micro tile mode as the destination of the last MSAA resolve.
2128 * This allows hitting the MSAA resolve fast path, which requires that both
2129 * src and dst micro tile modes match.
2130 */
2131 static void si_set_optimal_micro_tile_mode(struct r600_common_screen *rscreen,
2132 struct r600_texture *rtex)
2133 {
2134 if (rtex->resource.b.is_shared ||
2135 rtex->resource.b.b.nr_samples <= 1 ||
2136 rtex->surface.micro_tile_mode == rtex->last_msaa_resolve_target_micro_mode)
2137 return;
2138
2139 assert(rscreen->chip_class >= GFX9 ||
2140 rtex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
2141 assert(rtex->resource.b.b.last_level == 0);
2142
2143 if (rscreen->chip_class >= GFX9) {
2144 /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
2145 assert(rtex->surface.u.gfx9.surf.swizzle_mode >= 4);
2146
2147 /* If you do swizzle_mode % 4, you'll get:
2148 * 0 = Depth
2149 * 1 = Standard,
2150 * 2 = Displayable
2151 * 3 = Rotated
2152 *
2153 * Depth-sample order isn't allowed:
2154 */
2155 assert(rtex->surface.u.gfx9.surf.swizzle_mode % 4 != 0);
2156
2157 switch (rtex->last_msaa_resolve_target_micro_mode) {
2158 case RADEON_MICRO_MODE_DISPLAY:
2159 rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
2160 rtex->surface.u.gfx9.surf.swizzle_mode += 2; /* D */
2161 break;
2162 case RADEON_MICRO_MODE_THIN:
2163 rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
2164 rtex->surface.u.gfx9.surf.swizzle_mode += 1; /* S */
2165 break;
2166 case RADEON_MICRO_MODE_ROTATED:
2167 rtex->surface.u.gfx9.surf.swizzle_mode &= ~0x3;
2168 rtex->surface.u.gfx9.surf.swizzle_mode += 3; /* R */
2169 break;
2170 default: /* depth */
2171 assert(!"unexpected micro mode");
2172 return;
2173 }
2174 } else if (rscreen->chip_class >= CIK) {
2175 /* These magic numbers were copied from addrlib. It doesn't use
2176 * any definitions for them either. They are all 2D_TILED_THIN1
2177 * modes with different bpp and micro tile mode.
2178 */
2179 switch (rtex->last_msaa_resolve_target_micro_mode) {
2180 case RADEON_MICRO_MODE_DISPLAY:
2181 rtex->surface.u.legacy.tiling_index[0] = 10;
2182 break;
2183 case RADEON_MICRO_MODE_THIN:
2184 rtex->surface.u.legacy.tiling_index[0] = 14;
2185 break;
2186 case RADEON_MICRO_MODE_ROTATED:
2187 rtex->surface.u.legacy.tiling_index[0] = 28;
2188 break;
2189 default: /* depth, thick */
2190 assert(!"unexpected micro mode");
2191 return;
2192 }
2193 } else { /* SI */
2194 switch (rtex->last_msaa_resolve_target_micro_mode) {
2195 case RADEON_MICRO_MODE_DISPLAY:
2196 switch (rtex->surface.bpe) {
2197 case 1:
2198 rtex->surface.u.legacy.tiling_index[0] = 10;
2199 break;
2200 case 2:
2201 rtex->surface.u.legacy.tiling_index[0] = 11;
2202 break;
2203 default: /* 4, 8 */
2204 rtex->surface.u.legacy.tiling_index[0] = 12;
2205 break;
2206 }
2207 break;
2208 case RADEON_MICRO_MODE_THIN:
2209 switch (rtex->surface.bpe) {
2210 case 1:
2211 rtex->surface.u.legacy.tiling_index[0] = 14;
2212 break;
2213 case 2:
2214 rtex->surface.u.legacy.tiling_index[0] = 15;
2215 break;
2216 case 4:
2217 rtex->surface.u.legacy.tiling_index[0] = 16;
2218 break;
2219 default: /* 8, 16 */
2220 rtex->surface.u.legacy.tiling_index[0] = 17;
2221 break;
2222 }
2223 break;
2224 default: /* depth, thick */
2225 assert(!"unexpected micro mode");
2226 return;
2227 }
2228 }
2229
2230 rtex->surface.micro_tile_mode = rtex->last_msaa_resolve_target_micro_mode;
2231
2232 p_atomic_inc(&rscreen->dirty_tex_counter);
2233 }
2234
2235 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
2236 struct pipe_framebuffer_state *fb,
2237 struct r600_atom *fb_state,
2238 unsigned *buffers, ubyte *dirty_cbufs,
2239 const union pipe_color_union *color)
2240 {
2241 int i;
2242
2243 /* This function is broken in BE, so just disable this path for now */
2244 #ifdef PIPE_ARCH_BIG_ENDIAN
2245 return;
2246 #endif
2247
2248 if (rctx->render_cond)
2249 return;
2250
2251 for (i = 0; i < fb->nr_cbufs; i++) {
2252 struct r600_texture *tex;
2253 unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
2254
2255 if (!fb->cbufs[i])
2256 continue;
2257
2258 /* if this colorbuffer is not being cleared */
2259 if (!(*buffers & clear_bit))
2260 continue;
2261
2262 tex = (struct r600_texture *)fb->cbufs[i]->texture;
2263
2264 /* the clear is allowed if all layers are bound */
2265 if (fb->cbufs[i]->u.tex.first_layer != 0 ||
2266 fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
2267 continue;
2268 }
2269
2270 /* cannot clear mipmapped textures */
2271 if (fb->cbufs[i]->texture->last_level != 0) {
2272 continue;
2273 }
2274
2275 /* only supported on tiled surfaces */
2276 if (tex->surface.is_linear) {
2277 continue;
2278 }
2279
2280 /* shared textures can't use fast clear without an explicit flush,
2281 * because there is no way to communicate the clear color among
2282 * all clients
2283 */
2284 if (tex->resource.b.is_shared &&
2285 !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
2286 continue;
2287
2288 /* fast color clear with 1D tiling doesn't work on old kernels and CIK */
2289 if (rctx->chip_class == CIK &&
2290 tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_1D &&
2291 rctx->screen->info.drm_major == 2 &&
2292 rctx->screen->info.drm_minor < 38) {
2293 continue;
2294 }
2295
2296 {
2297 /* 128-bit formats are unusupported */
2298 if (tex->surface.bpe > 8) {
2299 continue;
2300 }
2301
2302 /* RB+ doesn't work with CMASK fast clear on Stoney. */
2303 if (rctx->family == CHIP_STONEY)
2304 continue;
2305
2306 /* ensure CMASK is enabled */
2307 r600_texture_alloc_cmask_separate(rctx->screen, tex);
2308 if (tex->cmask.size == 0) {
2309 continue;
2310 }
2311
2312 /* Do the fast clear. */
2313 rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
2314 tex->cmask.offset, tex->cmask.size, 0,
2315 R600_COHERENCY_CB_META);
2316
2317 bool need_compressed_update = !tex->dirty_level_mask;
2318
2319 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
2320
2321 if (need_compressed_update)
2322 p_atomic_inc(&rctx->screen->compressed_colortex_counter);
2323 }
2324
2325 /* We can change the micro tile mode before a full clear. */
2326 if (rctx->screen->chip_class >= SI)
2327 si_set_optimal_micro_tile_mode(rctx->screen, tex);
2328
2329 evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
2330
2331 if (dirty_cbufs)
2332 *dirty_cbufs |= 1 << i;
2333 rctx->set_atom_dirty(rctx, fb_state, true);
2334 *buffers &= ~clear_bit;
2335 }
2336 }
2337
2338 static struct pipe_memory_object *
2339 r600_memobj_from_handle(struct pipe_screen *screen,
2340 struct winsys_handle *whandle,
2341 bool dedicated)
2342 {
2343 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
2344 struct r600_memory_object *memobj = CALLOC_STRUCT(r600_memory_object);
2345 struct pb_buffer *buf = NULL;
2346 uint32_t stride, offset;
2347
2348 if (!memobj)
2349 return NULL;
2350
2351 buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle,
2352 &stride, &offset);
2353 if (!buf) {
2354 free(memobj);
2355 return NULL;
2356 }
2357
2358 memobj->b.dedicated = dedicated;
2359 memobj->buf = buf;
2360 memobj->stride = stride;
2361 memobj->offset = offset;
2362
2363 return (struct pipe_memory_object *)memobj;
2364
2365 }
2366
2367 static void
2368 r600_memobj_destroy(struct pipe_screen *screen,
2369 struct pipe_memory_object *_memobj)
2370 {
2371 struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
2372
2373 pb_reference(&memobj->buf, NULL);
2374 free(memobj);
2375 }
2376
2377 static struct pipe_resource *
2378 r600_texture_from_memobj(struct pipe_screen *screen,
2379 const struct pipe_resource *templ,
2380 struct pipe_memory_object *_memobj,
2381 uint64_t offset)
2382 {
2383 int r;
2384 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
2385 struct r600_memory_object *memobj = (struct r600_memory_object *)_memobj;
2386 struct r600_texture *rtex;
2387 struct radeon_surf surface = {};
2388 struct radeon_bo_metadata metadata = {};
2389 enum radeon_surf_mode array_mode;
2390 bool is_scanout;
2391 struct pb_buffer *buf = NULL;
2392
2393 if (memobj->b.dedicated) {
2394 rscreen->ws->buffer_get_metadata(memobj->buf, &metadata);
2395 r600_surface_import_metadata(rscreen, &surface, &metadata,
2396 &array_mode, &is_scanout);
2397 } else {
2398 /**
2399 * The bo metadata is unset for un-dedicated images. So we fall
2400 * back to linear. See answer to question 5 of the
2401 * VK_KHX_external_memory spec for some details.
2402 *
2403 * It is possible that this case isn't going to work if the
2404 * surface pitch isn't correctly aligned by default.
2405 *
2406 * In order to support it correctly we require multi-image
2407 * metadata to be syncrhonized between radv and radeonsi. The
2408 * semantics of associating multiple image metadata to a memory
2409 * object on the vulkan export side are not concretely defined
2410 * either.
2411 *
2412 * All the use cases we are aware of at the moment for memory
2413 * objects use dedicated allocations. So lets keep the initial
2414 * implementation simple.
2415 *
2416 * A possible alternative is to attempt to reconstruct the
2417 * tiling information when the TexParameter TEXTURE_TILING_EXT
2418 * is set.
2419 */
2420 array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
2421 is_scanout = false;
2422
2423 }
2424
2425 r = r600_init_surface(rscreen, &surface, templ,
2426 array_mode, memobj->stride,
2427 offset, true, is_scanout,
2428 false, false);
2429 if (r)
2430 return NULL;
2431
2432 rtex = r600_texture_create_object(screen, templ, memobj->buf, &surface);
2433 if (!rtex)
2434 return NULL;
2435
2436 /* r600_texture_create_object doesn't increment refcount of
2437 * memobj->buf, so increment it here.
2438 */
2439 pb_reference(&buf, memobj->buf);
2440
2441 rtex->resource.b.is_shared = true;
2442 rtex->resource.external_usage = PIPE_HANDLE_USAGE_READ_WRITE;
2443
2444 if (rscreen->apply_opaque_metadata)
2445 rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
2446
2447 return &rtex->resource.b.b;
2448 }
2449
2450 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
2451 {
2452 rscreen->b.resource_from_handle = r600_texture_from_handle;
2453 rscreen->b.resource_get_handle = r600_texture_get_handle;
2454 rscreen->b.resource_from_memobj = r600_texture_from_memobj;
2455 rscreen->b.memobj_create_from_handle = r600_memobj_from_handle;
2456 rscreen->b.memobj_destroy = r600_memobj_destroy;
2457 }
2458
2459 void r600_init_context_texture_functions(struct r600_common_context *rctx)
2460 {
2461 rctx->b.create_surface = r600_create_surface;
2462 rctx->b.surface_destroy = r600_surface_destroy;
2463 rctx->b.clear_texture = r600_clear_texture;
2464 }