gallium/radeon: notify all contexts when cmasks are enabled/disabled
[mesa.git] / src / gallium / drivers / radeon / r600_texture.c
1 /*
2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * Authors:
24 * Jerome Glisse
25 * Corbin Simpson
26 */
27 #include "r600_pipe_common.h"
28 #include "r600_cs.h"
29 #include "util/u_format.h"
30 #include "util/u_memory.h"
31 #include "util/u_pack_color.h"
32 #include <errno.h>
33 #include <inttypes.h>
34
35 /* Same as resource_copy_region, except that both upsampling and downsampling are allowed. */
36 static void r600_copy_region_with_blit(struct pipe_context *pipe,
37 struct pipe_resource *dst,
38 unsigned dst_level,
39 unsigned dstx, unsigned dsty, unsigned dstz,
40 struct pipe_resource *src,
41 unsigned src_level,
42 const struct pipe_box *src_box)
43 {
44 struct pipe_blit_info blit;
45
46 memset(&blit, 0, sizeof(blit));
47 blit.src.resource = src;
48 blit.src.format = src->format;
49 blit.src.level = src_level;
50 blit.src.box = *src_box;
51 blit.dst.resource = dst;
52 blit.dst.format = dst->format;
53 blit.dst.level = dst_level;
54 blit.dst.box.x = dstx;
55 blit.dst.box.y = dsty;
56 blit.dst.box.z = dstz;
57 blit.dst.box.width = src_box->width;
58 blit.dst.box.height = src_box->height;
59 blit.dst.box.depth = src_box->depth;
60 blit.mask = util_format_get_mask(src->format) &
61 util_format_get_mask(dst->format);
62 blit.filter = PIPE_TEX_FILTER_NEAREST;
63
64 if (blit.mask) {
65 pipe->blit(pipe, &blit);
66 }
67 }
68
69 /* Copy from a full GPU texture to a transfer's staging one. */
70 static void r600_copy_to_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
71 {
72 struct r600_common_context *rctx = (struct r600_common_context*)ctx;
73 struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
74 struct pipe_resource *dst = &rtransfer->staging->b.b;
75 struct pipe_resource *src = transfer->resource;
76
77 if (src->nr_samples > 1) {
78 r600_copy_region_with_blit(ctx, dst, 0, 0, 0, 0,
79 src, transfer->level, &transfer->box);
80 return;
81 }
82
83 rctx->dma_copy(ctx, dst, 0, 0, 0, 0, src, transfer->level,
84 &transfer->box);
85 }
86
87 /* Copy from a transfer's staging texture to a full GPU one. */
88 static void r600_copy_from_staging_texture(struct pipe_context *ctx, struct r600_transfer *rtransfer)
89 {
90 struct r600_common_context *rctx = (struct r600_common_context*)ctx;
91 struct pipe_transfer *transfer = (struct pipe_transfer*)rtransfer;
92 struct pipe_resource *dst = transfer->resource;
93 struct pipe_resource *src = &rtransfer->staging->b.b;
94 struct pipe_box sbox;
95
96 u_box_3d(0, 0, 0, transfer->box.width, transfer->box.height, transfer->box.depth, &sbox);
97
98 if (dst->nr_samples > 1) {
99 r600_copy_region_with_blit(ctx, dst, transfer->level,
100 transfer->box.x, transfer->box.y, transfer->box.z,
101 src, 0, &sbox);
102 return;
103 }
104
105 rctx->dma_copy(ctx, dst, transfer->level,
106 transfer->box.x, transfer->box.y, transfer->box.z,
107 src, 0, &sbox);
108 }
109
110 static unsigned r600_texture_get_offset(struct r600_texture *rtex, unsigned level,
111 const struct pipe_box *box)
112 {
113 enum pipe_format format = rtex->resource.b.b.format;
114
115 return rtex->surface.level[level].offset +
116 box->z * rtex->surface.level[level].slice_size +
117 box->y / util_format_get_blockheight(format) * rtex->surface.level[level].pitch_bytes +
118 box->x / util_format_get_blockwidth(format) * util_format_get_blocksize(format);
119 }
120
121 static int r600_init_surface(struct r600_common_screen *rscreen,
122 struct radeon_surf *surface,
123 const struct pipe_resource *ptex,
124 unsigned array_mode,
125 bool is_flushed_depth)
126 {
127 const struct util_format_description *desc =
128 util_format_description(ptex->format);
129 bool is_depth, is_stencil;
130
131 is_depth = util_format_has_depth(desc);
132 is_stencil = util_format_has_stencil(desc);
133
134 surface->npix_x = ptex->width0;
135 surface->npix_y = ptex->height0;
136 surface->npix_z = ptex->depth0;
137 surface->blk_w = util_format_get_blockwidth(ptex->format);
138 surface->blk_h = util_format_get_blockheight(ptex->format);
139 surface->blk_d = 1;
140 surface->array_size = 1;
141 surface->last_level = ptex->last_level;
142
143 if (rscreen->chip_class >= EVERGREEN && !is_flushed_depth &&
144 ptex->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) {
145 surface->bpe = 4; /* stencil is allocated separately on evergreen */
146 } else {
147 surface->bpe = util_format_get_blocksize(ptex->format);
148 /* align byte per element on dword */
149 if (surface->bpe == 3) {
150 surface->bpe = 4;
151 }
152 }
153
154 surface->nsamples = ptex->nr_samples ? ptex->nr_samples : 1;
155 surface->flags = RADEON_SURF_SET(array_mode, MODE);
156
157 switch (ptex->target) {
158 case PIPE_TEXTURE_1D:
159 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
160 break;
161 case PIPE_TEXTURE_RECT:
162 case PIPE_TEXTURE_2D:
163 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
164 break;
165 case PIPE_TEXTURE_3D:
166 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
167 break;
168 case PIPE_TEXTURE_1D_ARRAY:
169 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
170 surface->array_size = ptex->array_size;
171 break;
172 case PIPE_TEXTURE_2D_ARRAY:
173 case PIPE_TEXTURE_CUBE_ARRAY: /* cube array layout like 2d array */
174 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
175 surface->array_size = ptex->array_size;
176 break;
177 case PIPE_TEXTURE_CUBE:
178 surface->flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_CUBEMAP, TYPE);
179 break;
180 case PIPE_BUFFER:
181 default:
182 return -EINVAL;
183 }
184 if (ptex->bind & PIPE_BIND_SCANOUT) {
185 surface->flags |= RADEON_SURF_SCANOUT;
186 }
187
188 if (!is_flushed_depth && is_depth) {
189 surface->flags |= RADEON_SURF_ZBUFFER;
190
191 if (is_stencil) {
192 surface->flags |= RADEON_SURF_SBUFFER |
193 RADEON_SURF_HAS_SBUFFER_MIPTREE;
194 }
195 }
196 if (rscreen->chip_class >= SI) {
197 surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
198 }
199 return 0;
200 }
201
202 static int r600_setup_surface(struct pipe_screen *screen,
203 struct r600_texture *rtex,
204 unsigned pitch_in_bytes_override)
205 {
206 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
207 int r;
208
209 r = rscreen->ws->surface_init(rscreen->ws, &rtex->surface);
210 if (r) {
211 return r;
212 }
213
214 rtex->size = rtex->surface.bo_size;
215
216 if (pitch_in_bytes_override && pitch_in_bytes_override != rtex->surface.level[0].pitch_bytes) {
217 /* old ddx on evergreen over estimate alignment for 1d, only 1 level
218 * for those
219 */
220 rtex->surface.level[0].nblk_x = pitch_in_bytes_override / rtex->surface.bpe;
221 rtex->surface.level[0].pitch_bytes = pitch_in_bytes_override;
222 rtex->surface.level[0].slice_size = pitch_in_bytes_override * rtex->surface.level[0].nblk_y;
223 if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
224 rtex->surface.stencil_offset =
225 rtex->surface.stencil_level[0].offset = rtex->surface.level[0].slice_size;
226 }
227 }
228 return 0;
229 }
230
231 static void r600_texture_init_metadata(struct r600_texture *rtex,
232 struct radeon_bo_metadata *metadata)
233 {
234 struct radeon_surf *surface = &rtex->surface;
235
236 memset(metadata, 0, sizeof(*metadata));
237 metadata->microtile = surface->level[0].mode >= RADEON_SURF_MODE_1D ?
238 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
239 metadata->macrotile = surface->level[0].mode >= RADEON_SURF_MODE_2D ?
240 RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
241 metadata->pipe_config = surface->pipe_config;
242 metadata->bankw = surface->bankw;
243 metadata->bankh = surface->bankh;
244 metadata->tile_split = surface->tile_split;
245 metadata->stencil_tile_split = surface->stencil_tile_split;
246 metadata->mtilea = surface->mtilea;
247 metadata->num_banks = surface->num_banks;
248 metadata->stride = surface->level[0].pitch_bytes;
249 metadata->scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
250 }
251
252 static void r600_dirty_all_framebuffer_states(struct r600_common_screen *rscreen)
253 {
254 p_atomic_inc(&rscreen->dirty_fb_counter);
255 }
256
257 static void r600_eliminate_fast_color_clear(struct r600_common_screen *rscreen,
258 struct r600_texture *rtex)
259 {
260 struct pipe_context *ctx = rscreen->aux_context;
261
262 pipe_mutex_lock(rscreen->aux_context_lock);
263 ctx->flush_resource(ctx, &rtex->resource.b.b);
264 ctx->flush(ctx, NULL, 0);
265 pipe_mutex_unlock(rscreen->aux_context_lock);
266 }
267
268 static void r600_texture_disable_cmask(struct r600_common_screen *rscreen,
269 struct r600_texture *rtex)
270 {
271 if (!rtex->cmask.size)
272 return;
273
274 assert(rtex->resource.b.b.nr_samples <= 1);
275
276 /* Disable CMASK. */
277 memset(&rtex->cmask, 0, sizeof(rtex->cmask));
278 rtex->cmask.base_address_reg = rtex->resource.gpu_address >> 8;
279
280 if (rscreen->chip_class >= SI)
281 rtex->cb_color_info &= ~SI_S_028C70_FAST_CLEAR(1);
282 else
283 rtex->cb_color_info &= ~EG_S_028C70_FAST_CLEAR(1);
284
285 if (rtex->cmask_buffer != &rtex->resource)
286 pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL);
287
288 /* Notify all contexts about the change. */
289 r600_dirty_all_framebuffer_states(rscreen);
290 p_atomic_inc(&rscreen->compressed_colortex_counter);
291 }
292
293 static void r600_texture_disable_dcc(struct r600_common_screen *rscreen,
294 struct r600_texture *rtex)
295 {
296 struct r600_common_context *rctx =
297 (struct r600_common_context *)rscreen->aux_context;
298
299 if (!rtex->dcc_offset)
300 return;
301
302 /* Decompress DCC. */
303 pipe_mutex_lock(rscreen->aux_context_lock);
304 rctx->decompress_dcc(&rctx->b, rtex);
305 rctx->b.flush(&rctx->b, NULL, 0);
306 pipe_mutex_unlock(rscreen->aux_context_lock);
307
308 /* Disable DCC. */
309 rtex->dcc_offset = 0;
310 rtex->cb_color_info &= ~VI_S_028C70_DCC_ENABLE(1);
311
312 /* Notify all contexts about the change. */
313 r600_dirty_all_framebuffer_states(rscreen);
314
315 /* TODO: re-set all sampler views and images, but how? */
316 }
317
318 static boolean r600_texture_get_handle(struct pipe_screen* screen,
319 struct pipe_resource *resource,
320 struct winsys_handle *whandle,
321 unsigned usage)
322 {
323 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
324 struct r600_resource *res = (struct r600_resource*)resource;
325 struct r600_texture *rtex = (struct r600_texture*)resource;
326 struct radeon_bo_metadata metadata;
327
328 /* This is not supported now, but it might be required for OpenCL
329 * interop in the future.
330 */
331 if (resource->target != PIPE_BUFFER &&
332 (resource->nr_samples > 1 || rtex->is_depth))
333 return NULL;
334
335 if (!res->is_shared) {
336 res->is_shared = true;
337 res->external_usage = usage;
338
339 if (resource->target != PIPE_BUFFER) {
340 /* Since shader image stores don't support DCC on VI,
341 * disable it for external clients that want write
342 * access.
343 */
344 if (usage & PIPE_HANDLE_USAGE_WRITE)
345 r600_texture_disable_dcc(rscreen, rtex);
346
347 if (!(usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) {
348 /* Eliminate fast clear (both CMASK and DCC) */
349 r600_eliminate_fast_color_clear(rscreen, rtex);
350
351 /* Disable CMASK if flush_resource isn't going
352 * to be called.
353 */
354 r600_texture_disable_cmask(rscreen, rtex);
355 }
356
357 /* Set metadata. */
358 r600_texture_init_metadata(rtex, &metadata);
359 if (rscreen->query_opaque_metadata)
360 rscreen->query_opaque_metadata(rscreen, rtex,
361 &metadata);
362
363 rscreen->ws->buffer_set_metadata(res->buf, &metadata);
364 }
365 } else {
366 assert(res->external_usage == usage);
367 }
368
369 return rscreen->ws->buffer_get_handle(res->buf,
370 rtex->surface.level[0].pitch_bytes,
371 whandle);
372 }
373
374 static void r600_texture_destroy(struct pipe_screen *screen,
375 struct pipe_resource *ptex)
376 {
377 struct r600_texture *rtex = (struct r600_texture*)ptex;
378 struct r600_resource *resource = &rtex->resource;
379
380 if (rtex->flushed_depth_texture)
381 pipe_resource_reference((struct pipe_resource **)&rtex->flushed_depth_texture, NULL);
382
383 pipe_resource_reference((struct pipe_resource**)&rtex->htile_buffer, NULL);
384 if (rtex->cmask_buffer != &rtex->resource) {
385 pipe_resource_reference((struct pipe_resource**)&rtex->cmask_buffer, NULL);
386 }
387 pb_reference(&resource->buf, NULL);
388 FREE(rtex);
389 }
390
391 static const struct u_resource_vtbl r600_texture_vtbl;
392
393 /* The number of samples can be specified independently of the texture. */
394 void r600_texture_get_fmask_info(struct r600_common_screen *rscreen,
395 struct r600_texture *rtex,
396 unsigned nr_samples,
397 struct r600_fmask_info *out)
398 {
399 /* FMASK is allocated like an ordinary texture. */
400 struct radeon_surf fmask = rtex->surface;
401
402 memset(out, 0, sizeof(*out));
403
404 fmask.bo_alignment = 0;
405 fmask.bo_size = 0;
406 fmask.nsamples = 1;
407 fmask.flags |= RADEON_SURF_FMASK;
408
409 /* Force 2D tiling if it wasn't set. This may occur when creating
410 * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
411 * destination buffer must have an FMASK too. */
412 fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
413 fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
414
415 if (rscreen->chip_class >= SI) {
416 fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
417 }
418
419 switch (nr_samples) {
420 case 2:
421 case 4:
422 fmask.bpe = 1;
423 if (rscreen->chip_class <= CAYMAN) {
424 fmask.bankh = 4;
425 }
426 break;
427 case 8:
428 fmask.bpe = 4;
429 break;
430 default:
431 R600_ERR("Invalid sample count for FMASK allocation.\n");
432 return;
433 }
434
435 /* Overallocate FMASK on R600-R700 to fix colorbuffer corruption.
436 * This can be fixed by writing a separate FMASK allocator specifically
437 * for R600-R700 asics. */
438 if (rscreen->chip_class <= R700) {
439 fmask.bpe *= 2;
440 }
441
442 if (rscreen->ws->surface_init(rscreen->ws, &fmask)) {
443 R600_ERR("Got error in surface_init while allocating FMASK.\n");
444 return;
445 }
446
447 assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
448
449 out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64;
450 if (out->slice_tile_max)
451 out->slice_tile_max -= 1;
452
453 out->tile_mode_index = fmask.tiling_index[0];
454 out->pitch_in_pixels = fmask.level[0].nblk_x;
455 out->bank_height = fmask.bankh;
456 out->alignment = MAX2(256, fmask.bo_alignment);
457 out->size = fmask.bo_size;
458 }
459
460 static void r600_texture_allocate_fmask(struct r600_common_screen *rscreen,
461 struct r600_texture *rtex)
462 {
463 r600_texture_get_fmask_info(rscreen, rtex,
464 rtex->resource.b.b.nr_samples, &rtex->fmask);
465
466 rtex->fmask.offset = align(rtex->size, rtex->fmask.alignment);
467 rtex->size = rtex->fmask.offset + rtex->fmask.size;
468 }
469
470 void r600_texture_get_cmask_info(struct r600_common_screen *rscreen,
471 struct r600_texture *rtex,
472 struct r600_cmask_info *out)
473 {
474 unsigned cmask_tile_width = 8;
475 unsigned cmask_tile_height = 8;
476 unsigned cmask_tile_elements = cmask_tile_width * cmask_tile_height;
477 unsigned element_bits = 4;
478 unsigned cmask_cache_bits = 1024;
479 unsigned num_pipes = rscreen->info.num_tile_pipes;
480 unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
481
482 unsigned elements_per_macro_tile = (cmask_cache_bits / element_bits) * num_pipes;
483 unsigned pixels_per_macro_tile = elements_per_macro_tile * cmask_tile_elements;
484 unsigned sqrt_pixels_per_macro_tile = sqrt(pixels_per_macro_tile);
485 unsigned macro_tile_width = util_next_power_of_two(sqrt_pixels_per_macro_tile);
486 unsigned macro_tile_height = pixels_per_macro_tile / macro_tile_width;
487
488 unsigned pitch_elements = align(rtex->surface.npix_x, macro_tile_width);
489 unsigned height = align(rtex->surface.npix_y, macro_tile_height);
490
491 unsigned base_align = num_pipes * pipe_interleave_bytes;
492 unsigned slice_bytes =
493 ((pitch_elements * height * element_bits + 7) / 8) / cmask_tile_elements;
494
495 assert(macro_tile_width % 128 == 0);
496 assert(macro_tile_height % 128 == 0);
497
498 out->pitch = pitch_elements;
499 out->height = height;
500 out->xalign = macro_tile_width;
501 out->yalign = macro_tile_height;
502 out->slice_tile_max = ((pitch_elements * height) / (128*128)) - 1;
503 out->alignment = MAX2(256, base_align);
504 out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
505 align(slice_bytes, base_align);
506 }
507
508 static void si_texture_get_cmask_info(struct r600_common_screen *rscreen,
509 struct r600_texture *rtex,
510 struct r600_cmask_info *out)
511 {
512 unsigned pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
513 unsigned num_pipes = rscreen->info.num_tile_pipes;
514 unsigned cl_width, cl_height;
515
516 switch (num_pipes) {
517 case 2:
518 cl_width = 32;
519 cl_height = 16;
520 break;
521 case 4:
522 cl_width = 32;
523 cl_height = 32;
524 break;
525 case 8:
526 cl_width = 64;
527 cl_height = 32;
528 break;
529 case 16: /* Hawaii */
530 cl_width = 64;
531 cl_height = 64;
532 break;
533 default:
534 assert(0);
535 return;
536 }
537
538 unsigned base_align = num_pipes * pipe_interleave_bytes;
539
540 unsigned width = align(rtex->surface.npix_x, cl_width*8);
541 unsigned height = align(rtex->surface.npix_y, cl_height*8);
542 unsigned slice_elements = (width * height) / (8*8);
543
544 /* Each element of CMASK is a nibble. */
545 unsigned slice_bytes = slice_elements / 2;
546
547 out->pitch = width;
548 out->height = height;
549 out->xalign = cl_width * 8;
550 out->yalign = cl_height * 8;
551 out->slice_tile_max = (width * height) / (128*128);
552 if (out->slice_tile_max)
553 out->slice_tile_max -= 1;
554
555 out->alignment = MAX2(256, base_align);
556 out->size = (util_max_layer(&rtex->resource.b.b, 0) + 1) *
557 align(slice_bytes, base_align);
558 }
559
560 static void r600_texture_allocate_cmask(struct r600_common_screen *rscreen,
561 struct r600_texture *rtex)
562 {
563 if (rscreen->chip_class >= SI) {
564 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
565 } else {
566 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
567 }
568
569 rtex->cmask.offset = align(rtex->size, rtex->cmask.alignment);
570 rtex->size = rtex->cmask.offset + rtex->cmask.size;
571
572 if (rscreen->chip_class >= SI)
573 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
574 else
575 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
576 }
577
578 static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen,
579 struct r600_texture *rtex)
580 {
581 if (rtex->cmask_buffer)
582 return;
583
584 assert(rtex->cmask.size == 0);
585
586 if (rscreen->chip_class >= SI) {
587 si_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
588 } else {
589 r600_texture_get_cmask_info(rscreen, rtex, &rtex->cmask);
590 }
591
592 rtex->cmask_buffer = (struct r600_resource *)
593 pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
594 PIPE_USAGE_DEFAULT, rtex->cmask.size);
595 if (rtex->cmask_buffer == NULL) {
596 rtex->cmask.size = 0;
597 return;
598 }
599
600 /* update colorbuffer state bits */
601 rtex->cmask.base_address_reg = rtex->cmask_buffer->gpu_address >> 8;
602
603 if (rscreen->chip_class >= SI)
604 rtex->cb_color_info |= SI_S_028C70_FAST_CLEAR(1);
605 else
606 rtex->cb_color_info |= EG_S_028C70_FAST_CLEAR(1);
607
608 p_atomic_inc(&rscreen->compressed_colortex_counter);
609 }
610
611 static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
612 struct r600_texture *rtex)
613 {
614 unsigned cl_width, cl_height, width, height;
615 unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
616 unsigned num_pipes = rscreen->info.num_tile_pipes;
617
618 if (rscreen->chip_class <= EVERGREEN &&
619 rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 26)
620 return 0;
621
622 /* HW bug on R6xx. */
623 if (rscreen->chip_class == R600 &&
624 (rtex->surface.level[0].npix_x > 7680 ||
625 rtex->surface.level[0].npix_y > 7680))
626 return 0;
627
628 /* HTILE is broken with 1D tiling on old kernels and CIK. */
629 if (rscreen->chip_class >= CIK &&
630 rtex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
631 rscreen->info.drm_major == 2 && rscreen->info.drm_minor < 38)
632 return 0;
633
634 /* Overalign HTILE on Stoney to fix piglit/depthstencil-render-miplevels 585. */
635 if (rscreen->family == CHIP_STONEY)
636 num_pipes = 4;
637
638 switch (num_pipes) {
639 case 1:
640 cl_width = 32;
641 cl_height = 16;
642 break;
643 case 2:
644 cl_width = 32;
645 cl_height = 32;
646 break;
647 case 4:
648 cl_width = 64;
649 cl_height = 32;
650 break;
651 case 8:
652 cl_width = 64;
653 cl_height = 64;
654 break;
655 case 16:
656 cl_width = 128;
657 cl_height = 64;
658 break;
659 default:
660 assert(0);
661 return 0;
662 }
663
664 width = align(rtex->surface.npix_x, cl_width * 8);
665 height = align(rtex->surface.npix_y, cl_height * 8);
666
667 slice_elements = (width * height) / (8 * 8);
668 slice_bytes = slice_elements * 4;
669
670 pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
671 base_align = num_pipes * pipe_interleave_bytes;
672
673 rtex->htile.pitch = width;
674 rtex->htile.height = height;
675 rtex->htile.xalign = cl_width * 8;
676 rtex->htile.yalign = cl_height * 8;
677
678 return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
679 align(slice_bytes, base_align);
680 }
681
682 static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
683 struct r600_texture *rtex)
684 {
685 unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
686
687 if (!htile_size)
688 return;
689
690 rtex->htile_buffer = (struct r600_resource*)
691 pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
692 PIPE_USAGE_DEFAULT, htile_size);
693 if (rtex->htile_buffer == NULL) {
694 /* this is not a fatal error as we can still keep rendering
695 * without htile buffer */
696 R600_ERR("Failed to create buffer object for htile buffer.\n");
697 } else {
698 r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
699 htile_size, 0, true);
700 }
701 }
702
703 void r600_print_texture_info(struct r600_texture *rtex, FILE *f)
704 {
705 int i;
706
707 fprintf(f, " Info: npix_x=%u, npix_y=%u, npix_z=%u, blk_w=%u, "
708 "blk_h=%u, blk_d=%u, array_size=%u, last_level=%u, "
709 "bpe=%u, nsamples=%u, flags=0x%x, %s\n",
710 rtex->surface.npix_x, rtex->surface.npix_y,
711 rtex->surface.npix_z, rtex->surface.blk_w,
712 rtex->surface.blk_h, rtex->surface.blk_d,
713 rtex->surface.array_size, rtex->surface.last_level,
714 rtex->surface.bpe, rtex->surface.nsamples,
715 rtex->surface.flags, util_format_short_name(rtex->resource.b.b.format));
716
717 fprintf(f, " Layout: size=%"PRIu64", alignment=%"PRIu64", bankw=%u, "
718 "bankh=%u, nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
719 rtex->surface.bo_size, rtex->surface.bo_alignment, rtex->surface.bankw,
720 rtex->surface.bankh, rtex->surface.num_banks, rtex->surface.mtilea,
721 rtex->surface.tile_split, rtex->surface.pipe_config,
722 (rtex->surface.flags & RADEON_SURF_SCANOUT) != 0);
723
724 if (rtex->fmask.size)
725 fprintf(f, " FMask: offset=%u, size=%u, alignment=%u, pitch_in_pixels=%u, "
726 "bankh=%u, slice_tile_max=%u, tile_mode_index=%u\n",
727 rtex->fmask.offset, rtex->fmask.size, rtex->fmask.alignment,
728 rtex->fmask.pitch_in_pixels, rtex->fmask.bank_height,
729 rtex->fmask.slice_tile_max, rtex->fmask.tile_mode_index);
730
731 if (rtex->cmask.size)
732 fprintf(f, " CMask: offset=%u, size=%u, alignment=%u, pitch=%u, "
733 "height=%u, xalign=%u, yalign=%u, slice_tile_max=%u\n",
734 rtex->cmask.offset, rtex->cmask.size, rtex->cmask.alignment,
735 rtex->cmask.pitch, rtex->cmask.height, rtex->cmask.xalign,
736 rtex->cmask.yalign, rtex->cmask.slice_tile_max);
737
738 if (rtex->htile_buffer)
739 fprintf(f, " HTile: size=%u, alignment=%u, pitch=%u, height=%u, "
740 "xalign=%u, yalign=%u\n",
741 rtex->htile_buffer->b.b.width0,
742 rtex->htile_buffer->buf->alignment, rtex->htile.pitch,
743 rtex->htile.height, rtex->htile.xalign, rtex->htile.yalign);
744
745 if (rtex->dcc_offset) {
746 fprintf(f, " DCC: offset=%u, size=%"PRIu64", alignment=%"PRIu64"\n",
747 rtex->dcc_offset, rtex->surface.dcc_size,
748 rtex->surface.dcc_alignment);
749 for (i = 0; i <= rtex->surface.last_level; i++)
750 fprintf(f, " DCCLevel[%i]: offset=%"PRIu64"\n",
751 i, rtex->surface.level[i].dcc_offset);
752 }
753
754 for (i = 0; i <= rtex->surface.last_level; i++)
755 fprintf(f, " Level[%i]: offset=%"PRIu64", slice_size=%"PRIu64", "
756 "npix_x=%u, npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
757 "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
758 i, rtex->surface.level[i].offset,
759 rtex->surface.level[i].slice_size,
760 u_minify(rtex->resource.b.b.width0, i),
761 u_minify(rtex->resource.b.b.height0, i),
762 u_minify(rtex->resource.b.b.depth0, i),
763 rtex->surface.level[i].nblk_x,
764 rtex->surface.level[i].nblk_y,
765 rtex->surface.level[i].nblk_z,
766 rtex->surface.level[i].pitch_bytes,
767 rtex->surface.level[i].mode);
768
769 if (rtex->surface.flags & RADEON_SURF_SBUFFER) {
770 for (i = 0; i <= rtex->surface.last_level; i++) {
771 fprintf(f, " StencilLayout: tilesplit=%u\n",
772 rtex->surface.stencil_tile_split);
773 fprintf(f, " StencilLevel[%i]: offset=%"PRIu64", "
774 "slice_size=%"PRIu64", npix_x=%u, "
775 "npix_y=%u, npix_z=%u, nblk_x=%u, nblk_y=%u, "
776 "nblk_z=%u, pitch_bytes=%u, mode=%u\n",
777 i, rtex->surface.stencil_level[i].offset,
778 rtex->surface.stencil_level[i].slice_size,
779 u_minify(rtex->resource.b.b.width0, i),
780 u_minify(rtex->resource.b.b.height0, i),
781 u_minify(rtex->resource.b.b.depth0, i),
782 rtex->surface.stencil_level[i].nblk_x,
783 rtex->surface.stencil_level[i].nblk_y,
784 rtex->surface.stencil_level[i].nblk_z,
785 rtex->surface.stencil_level[i].pitch_bytes,
786 rtex->surface.stencil_level[i].mode);
787 }
788 }
789 }
790
791 /* Common processing for r600_texture_create and r600_texture_from_handle */
792 static struct r600_texture *
793 r600_texture_create_object(struct pipe_screen *screen,
794 const struct pipe_resource *base,
795 unsigned pitch_in_bytes_override,
796 struct pb_buffer *buf,
797 struct radeon_surf *surface)
798 {
799 struct r600_texture *rtex;
800 struct r600_resource *resource;
801 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
802
803 rtex = CALLOC_STRUCT(r600_texture);
804 if (!rtex)
805 return NULL;
806
807 resource = &rtex->resource;
808 resource->b.b = *base;
809 resource->b.vtbl = &r600_texture_vtbl;
810 pipe_reference_init(&resource->b.b.reference, 1);
811 resource->b.b.screen = screen;
812
813 /* don't include stencil-only formats which we don't support for rendering */
814 rtex->is_depth = util_format_has_depth(util_format_description(rtex->resource.b.b.format));
815
816 rtex->surface = *surface;
817 if (r600_setup_surface(screen, rtex, pitch_in_bytes_override)) {
818 FREE(rtex);
819 return NULL;
820 }
821
822 /* Tiled depth textures utilize the non-displayable tile order.
823 * This must be done after r600_setup_surface.
824 * Applies to R600-Cayman. */
825 rtex->non_disp_tiling = rtex->is_depth && rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
826
827 if (rtex->is_depth) {
828 if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
829 R600_RESOURCE_FLAG_FLUSHED_DEPTH)) &&
830 !(rscreen->debug_flags & DBG_NO_HYPERZ)) {
831
832 r600_texture_allocate_htile(rscreen, rtex);
833 }
834 } else {
835 if (base->nr_samples > 1) {
836 if (!buf) {
837 r600_texture_allocate_fmask(rscreen, rtex);
838 r600_texture_allocate_cmask(rscreen, rtex);
839 rtex->cmask_buffer = &rtex->resource;
840 }
841 if (!rtex->fmask.size || !rtex->cmask.size) {
842 FREE(rtex);
843 return NULL;
844 }
845 }
846
847 if (!buf && rtex->surface.dcc_size &&
848 !(rscreen->debug_flags & DBG_NO_DCC)) {
849 /* Reserve space for the DCC buffer. */
850 rtex->dcc_offset = align(rtex->size, rtex->surface.dcc_alignment);
851 rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
852 rtex->cb_color_info |= VI_S_028C70_DCC_ENABLE(1);
853 }
854 }
855
856 /* Now create the backing buffer. */
857 if (!buf) {
858 if (!r600_init_resource(rscreen, resource, rtex->size,
859 rtex->surface.bo_alignment, TRUE)) {
860 FREE(rtex);
861 return NULL;
862 }
863 } else {
864 resource->buf = buf;
865 resource->gpu_address = rscreen->ws->buffer_get_virtual_address(resource->buf);
866 resource->domains = rscreen->ws->buffer_get_initial_domain(resource->buf);
867 }
868
869 if (rtex->cmask.size) {
870 /* Initialize the cmask to 0xCC (= compressed state). */
871 r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
872 rtex->cmask.offset, rtex->cmask.size,
873 0xCCCCCCCC, true);
874 }
875 if (rtex->dcc_offset) {
876 r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
877 rtex->dcc_offset,
878 rtex->surface.dcc_size,
879 0xFFFFFFFF, true);
880 }
881
882 /* Initialize the CMASK base register value. */
883 rtex->cmask.base_address_reg =
884 (rtex->resource.gpu_address + rtex->cmask.offset) >> 8;
885
886 if (rscreen->debug_flags & DBG_VM) {
887 fprintf(stderr, "VM start=0x%"PRIX64" end=0x%"PRIX64" | Texture %ix%ix%i, %i levels, %i samples, %s\n",
888 rtex->resource.gpu_address,
889 rtex->resource.gpu_address + rtex->resource.buf->size,
890 base->width0, base->height0, util_max_layer(base, 0)+1, base->last_level+1,
891 base->nr_samples ? base->nr_samples : 1, util_format_short_name(base->format));
892 }
893
894 if (rscreen->debug_flags & DBG_TEX) {
895 puts("Texture:");
896 r600_print_texture_info(rtex, stdout);
897 }
898
899 return rtex;
900 }
901
902 static unsigned r600_choose_tiling(struct r600_common_screen *rscreen,
903 const struct pipe_resource *templ)
904 {
905 const struct util_format_description *desc = util_format_description(templ->format);
906 bool force_tiling = templ->flags & R600_RESOURCE_FLAG_FORCE_TILING;
907
908 /* MSAA resources must be 2D tiled. */
909 if (templ->nr_samples > 1)
910 return RADEON_SURF_MODE_2D;
911
912 /* Transfer resources should be linear. */
913 if (templ->flags & R600_RESOURCE_FLAG_TRANSFER)
914 return RADEON_SURF_MODE_LINEAR_ALIGNED;
915
916 /* r600g: force tiling on TEXTURE_2D and TEXTURE_3D compute resources. */
917 if (rscreen->chip_class >= R600 && rscreen->chip_class <= CAYMAN &&
918 (templ->bind & PIPE_BIND_COMPUTE_RESOURCE) &&
919 (templ->target == PIPE_TEXTURE_2D ||
920 templ->target == PIPE_TEXTURE_3D))
921 force_tiling = true;
922
923 /* Handle common candidates for the linear mode.
924 * Compressed textures must always be tiled. */
925 if (!force_tiling && !util_format_is_compressed(templ->format)) {
926 /* Not everything can be linear, so we cannot enforce it
927 * for all textures. */
928 if ((rscreen->debug_flags & DBG_NO_TILING) &&
929 (!util_format_is_depth_or_stencil(templ->format) ||
930 !(templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH)))
931 return RADEON_SURF_MODE_LINEAR_ALIGNED;
932
933 /* Tiling doesn't work with the 422 (SUBSAMPLED) formats on R600+. */
934 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED)
935 return RADEON_SURF_MODE_LINEAR_ALIGNED;
936
937 /* Cursors are linear on SI.
938 * (XXX double-check, maybe also use RADEON_SURF_SCANOUT) */
939 if (rscreen->chip_class >= SI &&
940 (templ->bind & PIPE_BIND_CURSOR))
941 return RADEON_SURF_MODE_LINEAR_ALIGNED;
942
943 if (templ->bind & PIPE_BIND_LINEAR)
944 return RADEON_SURF_MODE_LINEAR_ALIGNED;
945
946 /* Textures with a very small height are recommended to be linear. */
947 if (templ->target == PIPE_TEXTURE_1D ||
948 templ->target == PIPE_TEXTURE_1D_ARRAY ||
949 templ->height0 <= 4)
950 return RADEON_SURF_MODE_LINEAR_ALIGNED;
951
952 /* Textures likely to be mapped often. */
953 if (templ->usage == PIPE_USAGE_STAGING ||
954 templ->usage == PIPE_USAGE_STREAM)
955 return RADEON_SURF_MODE_LINEAR_ALIGNED;
956 }
957
958 /* Make small textures 1D tiled. */
959 if (templ->width0 <= 16 || templ->height0 <= 16 ||
960 (rscreen->debug_flags & DBG_NO_2D_TILING))
961 return RADEON_SURF_MODE_1D;
962
963 /* The allocator will switch to 1D if needed. */
964 return RADEON_SURF_MODE_2D;
965 }
966
967 struct pipe_resource *r600_texture_create(struct pipe_screen *screen,
968 const struct pipe_resource *templ)
969 {
970 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
971 struct radeon_surf surface = {0};
972 int r;
973
974 r = r600_init_surface(rscreen, &surface, templ,
975 r600_choose_tiling(rscreen, templ),
976 templ->flags & R600_RESOURCE_FLAG_FLUSHED_DEPTH);
977 if (r) {
978 return NULL;
979 }
980 r = rscreen->ws->surface_best(rscreen->ws, &surface);
981 if (r) {
982 return NULL;
983 }
984 return (struct pipe_resource *)r600_texture_create_object(screen, templ,
985 0, NULL, &surface);
986 }
987
988 static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen,
989 const struct pipe_resource *templ,
990 struct winsys_handle *whandle,
991 unsigned usage)
992 {
993 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen;
994 struct pb_buffer *buf = NULL;
995 unsigned stride = 0;
996 unsigned array_mode;
997 struct radeon_surf surface;
998 int r;
999 struct radeon_bo_metadata metadata = {};
1000 struct r600_texture *rtex;
1001
1002 /* Support only 2D textures without mipmaps */
1003 if ((templ->target != PIPE_TEXTURE_2D && templ->target != PIPE_TEXTURE_RECT) ||
1004 templ->depth0 != 1 || templ->last_level != 0)
1005 return NULL;
1006
1007 buf = rscreen->ws->buffer_from_handle(rscreen->ws, whandle, &stride);
1008 if (!buf)
1009 return NULL;
1010
1011 rscreen->ws->buffer_get_metadata(buf, &metadata);
1012
1013 surface.bankw = metadata.bankw;
1014 surface.bankh = metadata.bankh;
1015 surface.tile_split = metadata.tile_split;
1016 surface.stencil_tile_split = metadata.stencil_tile_split;
1017 surface.mtilea = metadata.mtilea;
1018
1019 if (metadata.macrotile == RADEON_LAYOUT_TILED)
1020 array_mode = RADEON_SURF_MODE_2D;
1021 else if (metadata.microtile == RADEON_LAYOUT_TILED)
1022 array_mode = RADEON_SURF_MODE_1D;
1023 else
1024 array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
1025
1026 r = r600_init_surface(rscreen, &surface, templ, array_mode, false);
1027 if (r) {
1028 return NULL;
1029 }
1030
1031 if (metadata.scanout)
1032 surface.flags |= RADEON_SURF_SCANOUT;
1033
1034 rtex = r600_texture_create_object(screen, templ,
1035 stride, buf, &surface);
1036 if (!rtex)
1037 return NULL;
1038
1039 rtex->resource.is_shared = true;
1040 rtex->resource.external_usage = usage;
1041 return &rtex->resource.b.b;
1042 }
1043
1044 bool r600_init_flushed_depth_texture(struct pipe_context *ctx,
1045 struct pipe_resource *texture,
1046 struct r600_texture **staging)
1047 {
1048 struct r600_texture *rtex = (struct r600_texture*)texture;
1049 struct pipe_resource resource;
1050 struct r600_texture **flushed_depth_texture = staging ?
1051 staging : &rtex->flushed_depth_texture;
1052
1053 if (!staging && rtex->flushed_depth_texture)
1054 return true; /* it's ready */
1055
1056 resource.target = texture->target;
1057 resource.format = texture->format;
1058 resource.width0 = texture->width0;
1059 resource.height0 = texture->height0;
1060 resource.depth0 = texture->depth0;
1061 resource.array_size = texture->array_size;
1062 resource.last_level = texture->last_level;
1063 resource.nr_samples = texture->nr_samples;
1064 resource.usage = staging ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
1065 resource.bind = texture->bind & ~PIPE_BIND_DEPTH_STENCIL;
1066 resource.flags = texture->flags | R600_RESOURCE_FLAG_FLUSHED_DEPTH;
1067
1068 if (staging)
1069 resource.flags |= R600_RESOURCE_FLAG_TRANSFER;
1070
1071 *flushed_depth_texture = (struct r600_texture *)ctx->screen->resource_create(ctx->screen, &resource);
1072 if (*flushed_depth_texture == NULL) {
1073 R600_ERR("failed to create temporary texture to hold flushed depth\n");
1074 return false;
1075 }
1076
1077 (*flushed_depth_texture)->is_flushing_texture = TRUE;
1078 (*flushed_depth_texture)->non_disp_tiling = false;
1079 return true;
1080 }
1081
1082 /**
1083 * Initialize the pipe_resource descriptor to be of the same size as the box,
1084 * which is supposed to hold a subregion of the texture "orig" at the given
1085 * mipmap level.
1086 */
1087 static void r600_init_temp_resource_from_box(struct pipe_resource *res,
1088 struct pipe_resource *orig,
1089 const struct pipe_box *box,
1090 unsigned level, unsigned flags)
1091 {
1092 memset(res, 0, sizeof(*res));
1093 res->format = orig->format;
1094 res->width0 = box->width;
1095 res->height0 = box->height;
1096 res->depth0 = 1;
1097 res->array_size = 1;
1098 res->usage = flags & R600_RESOURCE_FLAG_TRANSFER ? PIPE_USAGE_STAGING : PIPE_USAGE_DEFAULT;
1099 res->flags = flags;
1100
1101 /* We must set the correct texture target and dimensions for a 3D box. */
1102 if (box->depth > 1 && util_max_layer(orig, level) > 0)
1103 res->target = orig->target;
1104 else
1105 res->target = PIPE_TEXTURE_2D;
1106
1107 switch (res->target) {
1108 case PIPE_TEXTURE_1D_ARRAY:
1109 case PIPE_TEXTURE_2D_ARRAY:
1110 case PIPE_TEXTURE_CUBE_ARRAY:
1111 res->array_size = box->depth;
1112 break;
1113 case PIPE_TEXTURE_3D:
1114 res->depth0 = box->depth;
1115 break;
1116 default:;
1117 }
1118 }
1119
1120 static void *r600_texture_transfer_map(struct pipe_context *ctx,
1121 struct pipe_resource *texture,
1122 unsigned level,
1123 unsigned usage,
1124 const struct pipe_box *box,
1125 struct pipe_transfer **ptransfer)
1126 {
1127 struct r600_common_context *rctx = (struct r600_common_context*)ctx;
1128 struct r600_texture *rtex = (struct r600_texture*)texture;
1129 struct r600_transfer *trans;
1130 boolean use_staging_texture = FALSE;
1131 struct r600_resource *buf;
1132 unsigned offset = 0;
1133 char *map;
1134
1135 /* We cannot map a tiled texture directly because the data is
1136 * in a different order, therefore we do detiling using a blit.
1137 *
1138 * Also, use a temporary in GTT memory for read transfers, as
1139 * the CPU is much happier reading out of cached system memory
1140 * than uncached VRAM.
1141 */
1142 if (rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D) {
1143 use_staging_texture = TRUE;
1144 } else if ((usage & PIPE_TRANSFER_READ) && !(usage & PIPE_TRANSFER_MAP_DIRECTLY) &&
1145 (rtex->resource.domains == RADEON_DOMAIN_VRAM)) {
1146 /* Untiled buffers in VRAM, which is slow for CPU reads */
1147 use_staging_texture = TRUE;
1148 } else if (!(usage & PIPE_TRANSFER_READ) &&
1149 (r600_rings_is_buffer_referenced(rctx, rtex->resource.buf, RADEON_USAGE_READWRITE) ||
1150 !rctx->ws->buffer_wait(rtex->resource.buf, 0, RADEON_USAGE_READWRITE))) {
1151 /* Use a staging texture for uploads if the underlying BO is busy. */
1152 use_staging_texture = TRUE;
1153 }
1154
1155 if (texture->flags & R600_RESOURCE_FLAG_TRANSFER) {
1156 use_staging_texture = FALSE;
1157 }
1158
1159 if (use_staging_texture && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) {
1160 return NULL;
1161 }
1162
1163 trans = CALLOC_STRUCT(r600_transfer);
1164 if (!trans)
1165 return NULL;
1166 trans->transfer.resource = texture;
1167 trans->transfer.level = level;
1168 trans->transfer.usage = usage;
1169 trans->transfer.box = *box;
1170
1171 if (rtex->is_depth) {
1172 struct r600_texture *staging_depth;
1173
1174 if (rtex->resource.b.b.nr_samples > 1) {
1175 /* MSAA depth buffers need to be converted to single sample buffers.
1176 *
1177 * Mapping MSAA depth buffers can occur if ReadPixels is called
1178 * with a multisample GLX visual.
1179 *
1180 * First downsample the depth buffer to a temporary texture,
1181 * then decompress the temporary one to staging.
1182 *
1183 * Only the region being mapped is transfered.
1184 */
1185 struct pipe_resource resource;
1186
1187 r600_init_temp_resource_from_box(&resource, texture, box, level, 0);
1188
1189 if (!r600_init_flushed_depth_texture(ctx, &resource, &staging_depth)) {
1190 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1191 FREE(trans);
1192 return NULL;
1193 }
1194
1195 if (usage & PIPE_TRANSFER_READ) {
1196 struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
1197 if (!temp) {
1198 R600_ERR("failed to create a temporary depth texture\n");
1199 FREE(trans);
1200 return NULL;
1201 }
1202
1203 r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
1204 rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
1205 0, 0, 0, box->depth, 0, 0);
1206 pipe_resource_reference(&temp, NULL);
1207 }
1208 }
1209 else {
1210 /* XXX: only readback the rectangle which is being mapped? */
1211 /* XXX: when discard is true, no need to read back from depth texture */
1212 if (!r600_init_flushed_depth_texture(ctx, texture, &staging_depth)) {
1213 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1214 FREE(trans);
1215 return NULL;
1216 }
1217
1218 rctx->blit_decompress_depth(ctx, rtex, staging_depth,
1219 level, level,
1220 box->z, box->z + box->depth - 1,
1221 0, 0);
1222
1223 offset = r600_texture_get_offset(staging_depth, level, box);
1224 }
1225
1226 trans->transfer.stride = staging_depth->surface.level[level].pitch_bytes;
1227 trans->transfer.layer_stride = staging_depth->surface.level[level].slice_size;
1228 trans->staging = (struct r600_resource*)staging_depth;
1229 } else if (use_staging_texture) {
1230 struct pipe_resource resource;
1231 struct r600_texture *staging;
1232
1233 r600_init_temp_resource_from_box(&resource, texture, box, level,
1234 R600_RESOURCE_FLAG_TRANSFER);
1235 resource.usage = (usage & PIPE_TRANSFER_READ) ?
1236 PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
1237
1238 /* Create the temporary texture. */
1239 staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
1240 if (!staging) {
1241 R600_ERR("failed to create temporary texture to hold untiled copy\n");
1242 FREE(trans);
1243 return NULL;
1244 }
1245 trans->staging = &staging->resource;
1246 trans->transfer.stride = staging->surface.level[0].pitch_bytes;
1247 trans->transfer.layer_stride = staging->surface.level[0].slice_size;
1248 if (usage & PIPE_TRANSFER_READ) {
1249 r600_copy_to_staging_texture(ctx, trans);
1250 }
1251 } else {
1252 /* the resource is mapped directly */
1253 trans->transfer.stride = rtex->surface.level[level].pitch_bytes;
1254 trans->transfer.layer_stride = rtex->surface.level[level].slice_size;
1255 offset = r600_texture_get_offset(rtex, level, box);
1256 }
1257
1258 if (trans->staging) {
1259 buf = trans->staging;
1260 if (!rtex->is_depth && !(usage & PIPE_TRANSFER_READ))
1261 usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
1262 } else {
1263 buf = &rtex->resource;
1264 }
1265
1266 if (!(map = r600_buffer_map_sync_with_rings(rctx, buf, usage))) {
1267 pipe_resource_reference((struct pipe_resource**)&trans->staging, NULL);
1268 FREE(trans);
1269 return NULL;
1270 }
1271
1272 *ptransfer = &trans->transfer;
1273 return map + offset;
1274 }
1275
1276 static void r600_texture_transfer_unmap(struct pipe_context *ctx,
1277 struct pipe_transfer* transfer)
1278 {
1279 struct r600_transfer *rtransfer = (struct r600_transfer*)transfer;
1280 struct pipe_resource *texture = transfer->resource;
1281 struct r600_texture *rtex = (struct r600_texture*)texture;
1282
1283 if ((transfer->usage & PIPE_TRANSFER_WRITE) && rtransfer->staging) {
1284 if (rtex->is_depth && rtex->resource.b.b.nr_samples <= 1) {
1285 ctx->resource_copy_region(ctx, texture, transfer->level,
1286 transfer->box.x, transfer->box.y, transfer->box.z,
1287 &rtransfer->staging->b.b, transfer->level,
1288 &transfer->box);
1289 } else {
1290 r600_copy_from_staging_texture(ctx, rtransfer);
1291 }
1292 }
1293
1294 if (rtransfer->staging)
1295 pipe_resource_reference((struct pipe_resource**)&rtransfer->staging, NULL);
1296
1297 FREE(transfer);
1298 }
1299
1300 static const struct u_resource_vtbl r600_texture_vtbl =
1301 {
1302 NULL, /* get_handle */
1303 r600_texture_destroy, /* resource_destroy */
1304 r600_texture_transfer_map, /* transfer_map */
1305 u_default_transfer_flush_region, /* transfer_flush_region */
1306 r600_texture_transfer_unmap, /* transfer_unmap */
1307 NULL /* transfer_inline_write */
1308 };
1309
1310 struct pipe_surface *r600_create_surface_custom(struct pipe_context *pipe,
1311 struct pipe_resource *texture,
1312 const struct pipe_surface *templ,
1313 unsigned width, unsigned height)
1314 {
1315 struct r600_surface *surface = CALLOC_STRUCT(r600_surface);
1316
1317 if (!surface)
1318 return NULL;
1319
1320 assert(templ->u.tex.first_layer <= util_max_layer(texture, templ->u.tex.level));
1321 assert(templ->u.tex.last_layer <= util_max_layer(texture, templ->u.tex.level));
1322
1323 pipe_reference_init(&surface->base.reference, 1);
1324 pipe_resource_reference(&surface->base.texture, texture);
1325 surface->base.context = pipe;
1326 surface->base.format = templ->format;
1327 surface->base.width = width;
1328 surface->base.height = height;
1329 surface->base.u = templ->u;
1330 return &surface->base;
1331 }
1332
1333 static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
1334 struct pipe_resource *tex,
1335 const struct pipe_surface *templ)
1336 {
1337 unsigned level = templ->u.tex.level;
1338 unsigned width = u_minify(tex->width0, level);
1339 unsigned height = u_minify(tex->height0, level);
1340
1341 if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
1342 const struct util_format_description *tex_desc
1343 = util_format_description(tex->format);
1344 const struct util_format_description *templ_desc
1345 = util_format_description(templ->format);
1346
1347 assert(tex_desc->block.bits == templ_desc->block.bits);
1348
1349 /* Adjust size of surface if and only if the block width or
1350 * height is changed. */
1351 if (tex_desc->block.width != templ_desc->block.width ||
1352 tex_desc->block.height != templ_desc->block.height) {
1353 unsigned nblks_x = util_format_get_nblocksx(tex->format, width);
1354 unsigned nblks_y = util_format_get_nblocksy(tex->format, height);
1355
1356 width = nblks_x * templ_desc->block.width;
1357 height = nblks_y * templ_desc->block.height;
1358 }
1359 }
1360
1361 return r600_create_surface_custom(pipe, tex, templ, width, height);
1362 }
1363
1364 static void r600_surface_destroy(struct pipe_context *pipe,
1365 struct pipe_surface *surface)
1366 {
1367 struct r600_surface *surf = (struct r600_surface*)surface;
1368 pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_fmask, NULL);
1369 pipe_resource_reference((struct pipe_resource**)&surf->cb_buffer_cmask, NULL);
1370 pipe_resource_reference(&surface->texture, NULL);
1371 FREE(surface);
1372 }
1373
1374 unsigned r600_translate_colorswap(enum pipe_format format)
1375 {
1376 const struct util_format_description *desc = util_format_description(format);
1377
1378 #define HAS_SWIZZLE(chan,swz) (desc->swizzle[chan] == UTIL_FORMAT_SWIZZLE_##swz)
1379
1380 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
1381 return V_0280A0_SWAP_STD;
1382
1383 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
1384 return ~0U;
1385
1386 switch (desc->nr_channels) {
1387 case 1:
1388 if (HAS_SWIZZLE(0,X))
1389 return V_0280A0_SWAP_STD; /* X___ */
1390 else if (HAS_SWIZZLE(3,X))
1391 return V_0280A0_SWAP_ALT_REV; /* ___X */
1392 break;
1393 case 2:
1394 if ((HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,Y)) ||
1395 (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(1,NONE)) ||
1396 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,Y)))
1397 return V_0280A0_SWAP_STD; /* XY__ */
1398 else if ((HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,X)) ||
1399 (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(1,NONE)) ||
1400 (HAS_SWIZZLE(0,NONE) && HAS_SWIZZLE(1,X)))
1401 return V_0280A0_SWAP_STD_REV; /* YX__ */
1402 else if (HAS_SWIZZLE(0,X) && HAS_SWIZZLE(3,Y))
1403 return V_0280A0_SWAP_ALT; /* X__Y */
1404 else if (HAS_SWIZZLE(0,Y) && HAS_SWIZZLE(3,X))
1405 return V_0280A0_SWAP_ALT_REV; /* Y__X */
1406 break;
1407 case 3:
1408 if (HAS_SWIZZLE(0,X))
1409 return V_0280A0_SWAP_STD; /* XYZ */
1410 else if (HAS_SWIZZLE(0,Z))
1411 return V_0280A0_SWAP_STD_REV; /* ZYX */
1412 break;
1413 case 4:
1414 /* check the middle channels, the 1st and 4th channel can be NONE */
1415 if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,Z))
1416 return V_0280A0_SWAP_STD; /* XYZW */
1417 else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,Y))
1418 return V_0280A0_SWAP_STD_REV; /* WZYX */
1419 else if (HAS_SWIZZLE(1,Y) && HAS_SWIZZLE(2,X))
1420 return V_0280A0_SWAP_ALT; /* ZYXW */
1421 else if (HAS_SWIZZLE(1,Z) && HAS_SWIZZLE(2,W))
1422 return V_0280A0_SWAP_ALT_REV; /* YZWX */
1423 break;
1424 }
1425 return ~0U;
1426 }
1427
1428 static void evergreen_set_clear_color(struct r600_texture *rtex,
1429 enum pipe_format surface_format,
1430 const union pipe_color_union *color)
1431 {
1432 union util_color uc;
1433
1434 memset(&uc, 0, sizeof(uc));
1435
1436 if (util_format_is_pure_uint(surface_format)) {
1437 util_format_write_4ui(surface_format, color->ui, 0, &uc, 0, 0, 0, 1, 1);
1438 } else if (util_format_is_pure_sint(surface_format)) {
1439 util_format_write_4i(surface_format, color->i, 0, &uc, 0, 0, 0, 1, 1);
1440 } else {
1441 util_pack_color(color->f, surface_format, &uc);
1442 }
1443
1444 memcpy(rtex->color_clear_value, &uc, 2 * sizeof(uint32_t));
1445 }
1446
1447 static void vi_get_fast_clear_parameters(enum pipe_format surface_format,
1448 const union pipe_color_union *color,
1449 uint32_t* reset_value,
1450 bool* clear_words_needed)
1451 {
1452 bool values[4] = {};
1453 int i;
1454 bool main_value = false;
1455 bool extra_value = false;
1456 int extra_channel;
1457 const struct util_format_description *desc = util_format_description(surface_format);
1458
1459 *clear_words_needed = true;
1460 *reset_value = 0x20202020U;
1461
1462 /* If we want to clear without needing a fast clear eliminate step, we
1463 * can set each channel to 0 or 1 (or 0/max for integer formats). We
1464 * have two sets of flags, one for the last or first channel(extra) and
1465 * one for the other channels(main).
1466 */
1467
1468 if (surface_format == PIPE_FORMAT_R11G11B10_FLOAT ||
1469 surface_format == PIPE_FORMAT_B5G6R5_UNORM ||
1470 surface_format == PIPE_FORMAT_B5G6R5_SRGB) {
1471 extra_channel = -1;
1472 } else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1473 if(r600_translate_colorswap(surface_format) <= 1)
1474 extra_channel = desc->nr_channels - 1;
1475 else
1476 extra_channel = 0;
1477 } else
1478 return;
1479
1480 for (i = 0; i < 4; ++i) {
1481 int index = desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X;
1482
1483 if (desc->swizzle[i] < UTIL_FORMAT_SWIZZLE_X ||
1484 desc->swizzle[i] > UTIL_FORMAT_SWIZZLE_W)
1485 continue;
1486
1487 if (util_format_is_pure_sint(surface_format)) {
1488 values[i] = color->i[i] != 0;
1489 if (color->i[i] != 0 && color->i[i] != INT32_MAX)
1490 return;
1491 } else if (util_format_is_pure_uint(surface_format)) {
1492 values[i] = color->ui[i] != 0U;
1493 if (color->ui[i] != 0U && color->ui[i] != UINT32_MAX)
1494 return;
1495 } else {
1496 values[i] = color->f[i] != 0.0F;
1497 if (color->f[i] != 0.0F && color->f[i] != 1.0F)
1498 return;
1499 }
1500
1501 if (index == extra_channel)
1502 extra_value = values[i];
1503 else
1504 main_value = values[i];
1505 }
1506
1507 for (int i = 0; i < 4; ++i)
1508 if (values[i] != main_value &&
1509 desc->swizzle[i] - UTIL_FORMAT_SWIZZLE_X != extra_channel &&
1510 desc->swizzle[i] >= UTIL_FORMAT_SWIZZLE_X &&
1511 desc->swizzle[i] <= UTIL_FORMAT_SWIZZLE_W)
1512 return;
1513
1514 *clear_words_needed = false;
1515 if (main_value)
1516 *reset_value |= 0x80808080U;
1517
1518 if (extra_value)
1519 *reset_value |= 0x40404040U;
1520 }
1521
1522 void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
1523 struct pipe_framebuffer_state *fb,
1524 struct r600_atom *fb_state,
1525 unsigned *buffers, unsigned *dirty_cbufs,
1526 const union pipe_color_union *color)
1527 {
1528 int i;
1529
1530 /* This function is broken in BE, so just disable this path for now */
1531 #ifdef PIPE_ARCH_BIG_ENDIAN
1532 return;
1533 #endif
1534
1535 if (rctx->render_cond)
1536 return;
1537
1538 for (i = 0; i < fb->nr_cbufs; i++) {
1539 struct r600_texture *tex;
1540 unsigned clear_bit = PIPE_CLEAR_COLOR0 << i;
1541
1542 if (!fb->cbufs[i])
1543 continue;
1544
1545 /* if this colorbuffer is not being cleared */
1546 if (!(*buffers & clear_bit))
1547 continue;
1548
1549 tex = (struct r600_texture *)fb->cbufs[i]->texture;
1550
1551 /* 128-bit formats are unusupported */
1552 if (util_format_get_blocksizebits(fb->cbufs[i]->format) > 64) {
1553 continue;
1554 }
1555
1556 /* the clear is allowed if all layers are bound */
1557 if (fb->cbufs[i]->u.tex.first_layer != 0 ||
1558 fb->cbufs[i]->u.tex.last_layer != util_max_layer(&tex->resource.b.b, 0)) {
1559 continue;
1560 }
1561
1562 /* cannot clear mipmapped textures */
1563 if (fb->cbufs[i]->texture->last_level != 0) {
1564 continue;
1565 }
1566
1567 /* only supported on tiled surfaces */
1568 if (tex->surface.level[0].mode < RADEON_SURF_MODE_1D) {
1569 continue;
1570 }
1571
1572 /* shared textures can't use fast clear without an explicit flush,
1573 * because there is no way to communicate the clear color among
1574 * all clients
1575 */
1576 if (tex->resource.is_shared &&
1577 !(tex->resource.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
1578 continue;
1579
1580 /* fast color clear with 1D tiling doesn't work on old kernels and CIK */
1581 if (tex->surface.level[0].mode == RADEON_SURF_MODE_1D &&
1582 rctx->chip_class >= CIK &&
1583 rctx->screen->info.drm_major == 2 &&
1584 rctx->screen->info.drm_minor < 38) {
1585 continue;
1586 }
1587
1588 if (tex->dcc_offset) {
1589 uint32_t reset_value;
1590 bool clear_words_needed;
1591
1592 if (rctx->screen->debug_flags & DBG_NO_DCC_CLEAR)
1593 continue;
1594
1595 vi_get_fast_clear_parameters(fb->cbufs[i]->format, color, &reset_value, &clear_words_needed);
1596
1597 rctx->clear_buffer(&rctx->b, &tex->resource.b.b,
1598 tex->dcc_offset, tex->surface.dcc_size,
1599 reset_value, true);
1600
1601 if (clear_words_needed)
1602 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
1603 } else {
1604 /* Stoney/RB+ doesn't work with CMASK fast clear. */
1605 if (rctx->family == CHIP_STONEY)
1606 continue;
1607
1608 /* ensure CMASK is enabled */
1609 r600_texture_alloc_cmask_separate(rctx->screen, tex);
1610 if (tex->cmask.size == 0) {
1611 continue;
1612 }
1613
1614 /* Do the fast clear. */
1615 rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
1616 tex->cmask.offset, tex->cmask.size, 0, true);
1617
1618 tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
1619 }
1620
1621 evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
1622
1623 if (dirty_cbufs)
1624 *dirty_cbufs |= 1 << i;
1625 rctx->set_atom_dirty(rctx, fb_state, true);
1626 *buffers &= ~clear_bit;
1627 }
1628 }
1629
1630 void r600_init_screen_texture_functions(struct r600_common_screen *rscreen)
1631 {
1632 rscreen->b.resource_from_handle = r600_texture_from_handle;
1633 rscreen->b.resource_get_handle = r600_texture_get_handle;
1634 }
1635
1636 void r600_init_context_texture_functions(struct r600_common_context *rctx)
1637 {
1638 rctx->b.create_surface = r600_create_surface;
1639 rctx->b.surface_destroy = r600_surface_destroy;
1640 }