ilo: add ilo_dev_info shared by the screen and contexts
[mesa.git] / src / gallium / drivers / ilo / ilo_resource.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_transfer.h"
29
30 #include "ilo_cp.h"
31 #include "ilo_context.h"
32 #include "ilo_screen.h"
33 #include "ilo_resource.h"
34
35 static struct intel_bo *
36 alloc_buf_bo(const struct ilo_resource *res)
37 {
38 struct ilo_screen *is = ilo_screen(res->base.screen);
39 struct intel_bo *bo;
40 const char *name;
41 const unsigned size = res->bo_width;
42
43 switch (res->base.bind) {
44 case PIPE_BIND_VERTEX_BUFFER:
45 name = "vertex buffer";
46 break;
47 case PIPE_BIND_INDEX_BUFFER:
48 name = "index buffer";
49 break;
50 case PIPE_BIND_CONSTANT_BUFFER:
51 name = "constant buffer";
52 break;
53 case PIPE_BIND_STREAM_OUTPUT:
54 name = "stream output";
55 break;
56 default:
57 name = "unknown buffer";
58 break;
59 }
60
61 /* this is what a buffer supposed to be like */
62 assert(res->bo_width * res->bo_height * res->bo_cpp == size);
63 assert(res->tiling == INTEL_TILING_NONE);
64 assert(res->bo_stride == 0);
65
66 if (res->handle) {
67 bo = is->winsys->import_handle(is->winsys, name,
68 res->bo_width, res->bo_height, res->bo_cpp, res->handle);
69
70 /* since the bo is shared to us, make sure it meets the expectations */
71 if (bo) {
72 assert(bo->get_size(res->bo) == size);
73 assert(bo->get_tiling(res->bo) == res->tiling);
74 assert(bo->get_pitch(res->bo) == res->bo_stride);
75 }
76 }
77 else {
78 bo = is->winsys->alloc_buffer(is->winsys, name, size, 0);
79 }
80
81 return bo;
82 }
83
84 static struct intel_bo *
85 alloc_tex_bo(const struct ilo_resource *res)
86 {
87 struct ilo_screen *is = ilo_screen(res->base.screen);
88 struct intel_bo *bo;
89 const char *name;
90
91 switch (res->base.target) {
92 case PIPE_TEXTURE_1D:
93 name = "1D texture";
94 break;
95 case PIPE_TEXTURE_2D:
96 name = "2D texture";
97 break;
98 case PIPE_TEXTURE_3D:
99 name = "3D texture";
100 break;
101 case PIPE_TEXTURE_CUBE:
102 name = "cube texture";
103 break;
104 case PIPE_TEXTURE_RECT:
105 name = "rectangle texture";
106 break;
107 case PIPE_TEXTURE_1D_ARRAY:
108 name = "1D array texture";
109 break;
110 case PIPE_TEXTURE_2D_ARRAY:
111 name = "2D array texture";
112 break;
113 case PIPE_TEXTURE_CUBE_ARRAY:
114 name = "cube array texture";
115 break;
116 default:
117 name ="unknown texture";
118 break;
119 }
120
121 if (res->handle) {
122 bo = is->winsys->import_handle(is->winsys, name,
123 res->bo_width, res->bo_height, res->bo_cpp, res->handle);
124 }
125 else {
126 const bool for_render =
127 (res->base.bind & (PIPE_BIND_DEPTH_STENCIL |
128 PIPE_BIND_RENDER_TARGET));
129 const unsigned long flags =
130 (for_render) ? INTEL_ALLOC_FOR_RENDER : 0;
131
132 bo = is->winsys->alloc(is->winsys, name,
133 res->bo_width, res->bo_height, res->bo_cpp,
134 res->tiling, flags);
135 }
136
137 return bo;
138 }
139
140 static bool
141 realloc_bo(struct ilo_resource *res)
142 {
143 struct intel_bo *old_bo = res->bo;
144
145 /* a shared bo cannot be reallocated */
146 if (old_bo && res->handle)
147 return false;
148
149 if (res->base.target == PIPE_BUFFER)
150 res->bo = alloc_buf_bo(res);
151 else
152 res->bo = alloc_tex_bo(res);
153
154 if (!res->bo) {
155 res->bo = old_bo;
156 return false;
157 }
158
159 /* winsys may decide to use a different tiling */
160 res->tiling = res->bo->get_tiling(res->bo);
161 res->bo_stride = res->bo->get_pitch(res->bo);
162
163 if (old_bo)
164 old_bo->unreference(old_bo);
165
166 return true;
167 }
168
169 static void
170 ilo_transfer_inline_write(struct pipe_context *pipe,
171 struct pipe_resource *r,
172 unsigned level,
173 unsigned usage,
174 const struct pipe_box *box,
175 const void *data,
176 unsigned stride,
177 unsigned layer_stride)
178 {
179 struct ilo_context *ilo = ilo_context(pipe);
180 struct ilo_resource *res = ilo_resource(r);
181 int offset, size;
182 bool will_be_busy;
183
184 /*
185 * Fall back to map(), memcpy(), and unmap(). We use this path for
186 * unsynchronized write, as the buffer is likely to be busy and pwrite()
187 * will stall.
188 */
189 if (unlikely(res->base.target != PIPE_BUFFER) ||
190 (usage & PIPE_TRANSFER_UNSYNCHRONIZED)) {
191 u_default_transfer_inline_write(pipe, r,
192 level, usage, box, data, stride, layer_stride);
193
194 return;
195 }
196
197 /*
198 * XXX With hardware context support, the bo may be needed by GPU without
199 * being referenced by ilo->cp->bo. We have to flush unconditionally, and
200 * that is bad.
201 */
202 if (ilo->cp->hw_ctx)
203 ilo_cp_flush(ilo->cp);
204
205 will_be_busy = ilo->cp->bo->references(ilo->cp->bo, res->bo);
206
207 /* see if we can avoid stalling */
208 if (will_be_busy || intel_bo_is_busy(res->bo)) {
209 bool will_stall = true;
210
211 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
212 /* old data not needed so discard the old bo to avoid stalling */
213 if (realloc_bo(res))
214 will_stall = false;
215 }
216 else {
217 /*
218 * We could allocate a temporary bo to hold the data and emit
219 * pipelined copy blit to move them to res->bo. But for now, do
220 * nothing.
221 */
222 }
223
224 /* flush to make bo busy (so that pwrite() stalls as it should be) */
225 if (will_stall && will_be_busy)
226 ilo_cp_flush(ilo->cp);
227 }
228
229 /* they should specify just an offset and a size */
230 assert(level == 0);
231 assert(box->y == 0);
232 assert(box->z == 0);
233 assert(box->height == 1);
234 assert(box->depth == 1);
235 offset = box->x;
236 size = box->width;
237
238 res->bo->pwrite(res->bo, offset, size, data);
239 }
240
241 static void
242 ilo_transfer_unmap(struct pipe_context *pipe,
243 struct pipe_transfer *transfer)
244 {
245 struct ilo_resource *res = ilo_resource(transfer->resource);
246
247 res->bo->unmap(res->bo);
248
249 pipe_resource_reference(&transfer->resource, NULL);
250 FREE(transfer);
251 }
252
253 static void
254 ilo_transfer_flush_region(struct pipe_context *pipe,
255 struct pipe_transfer *transfer,
256 const struct pipe_box *box)
257 {
258 }
259
260 static bool
261 map_resource(struct ilo_context *ilo, struct ilo_resource *res,
262 unsigned usage)
263 {
264 struct ilo_screen *is = ilo_screen(res->base.screen);
265 bool will_be_busy;
266 int err;
267
268 /* simply map unsynchronized */
269 if (usage & PIPE_TRANSFER_UNSYNCHRONIZED) {
270 err = res->bo->map_unsynchronized(res->bo);
271 return !err;
272 }
273
274 /*
275 * XXX With hardware context support, the bo may be needed by GPU without
276 * being referenced by ilo->cp->bo. We have to flush unconditionally, and
277 * that is bad.
278 */
279 if (ilo->cp->hw_ctx)
280 ilo_cp_flush(ilo->cp);
281
282 will_be_busy = ilo->cp->bo->references(ilo->cp->bo, res->bo);
283
284 /* see if we can avoid stalling */
285 if (will_be_busy || intel_bo_is_busy(res->bo)) {
286 bool will_stall = true;
287
288 if (usage & PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE) {
289 /* discard old bo and allocate a new one for mapping */
290 if (realloc_bo(res))
291 will_stall = false;
292 }
293 else if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
294 /* nothing we can do */
295 }
296 else if (usage & PIPE_TRANSFER_FLUSH_EXPLICIT) {
297 /*
298 * We could allocate and return a system buffer here. When a region
299 * of the buffer is explicitly flushed, we pwrite() the region to a
300 * temporary bo and emit pipelined copy blit.
301 *
302 * For now, do nothing.
303 */
304 }
305 else if (usage & PIPE_TRANSFER_DISCARD_RANGE) {
306 /*
307 * We could allocate a temporary bo for mapping, and emit pipelined
308 * copy blit upon unmapping.
309 *
310 * For now, do nothing.
311 */
312 }
313
314 if (will_stall) {
315 if (usage & PIPE_TRANSFER_DONTBLOCK)
316 return false;
317
318 /* flush to make bo busy (so that map() stalls as it should be) */
319 if (will_be_busy)
320 ilo_cp_flush(ilo->cp);
321 }
322 }
323
324 /* prefer map() when there is the last-level cache */
325 if (res->tiling == INTEL_TILING_NONE &&
326 (is->dev.has_llc || (usage & PIPE_TRANSFER_READ)))
327 err = res->bo->map(res->bo, (usage & PIPE_TRANSFER_WRITE));
328 else
329 err = res->bo->map_gtt(res->bo);
330
331 return !err;
332 }
333
334 static void *
335 ilo_transfer_map(struct pipe_context *pipe,
336 struct pipe_resource *r,
337 unsigned level,
338 unsigned usage,
339 const struct pipe_box *box,
340 struct pipe_transfer **transfer)
341 {
342 struct ilo_context *ilo = ilo_context(pipe);
343 struct ilo_resource *res = ilo_resource(r);
344 struct pipe_transfer *xfer;
345 void *ptr;
346 int x, y;
347
348 xfer = MALLOC_STRUCT(pipe_transfer);
349 if (!xfer)
350 return NULL;
351
352 if (!map_resource(ilo, res, usage)) {
353 FREE(xfer);
354 return NULL;
355 }
356
357 /* init transfer */
358 xfer->resource = NULL;
359 pipe_resource_reference(&xfer->resource, &res->base);
360 xfer->level = level;
361 xfer->usage = usage;
362 xfer->box = *box;
363 /* stride for a block row, not a texel row */
364 xfer->stride = res->bo_stride;
365
366 /*
367 * we can walk through layers when the resource is a texture array or
368 * when this is the first level of a 3D texture being mapped
369 */
370 if (res->base.array_size > 1 ||
371 (res->base.target == PIPE_TEXTURE_3D && level == 0)) {
372 const unsigned qpitch =
373 res->slice_offsets[level][1].y - res->slice_offsets[level][0].y;
374
375 assert(qpitch % res->block_height == 0);
376 xfer->layer_stride = (qpitch / res->block_height) * xfer->stride;
377 }
378 else {
379 xfer->layer_stride = 0;
380 }
381
382 x = res->slice_offsets[level][box->z].x;
383 y = res->slice_offsets[level][box->z].y;
384
385 x += box->x;
386 y += box->y;
387
388 /* in blocks */
389 assert(x % res->block_width == 0 && y % res->block_height == 0);
390 x /= res->block_width;
391 y /= res->block_height;
392
393 ptr = res->bo->get_virtual(res->bo);
394 ptr += y * res->bo_stride + x * res->bo_cpp;
395
396 *transfer = xfer;
397
398 return ptr;
399 }
400
401 static bool
402 alloc_slice_offsets(struct ilo_resource *res)
403 {
404 int depth, lv;
405
406 /* sum the depths of all levels */
407 depth = 0;
408 for (lv = 0; lv <= res->base.last_level; lv++)
409 depth += u_minify(res->base.depth0, lv);
410
411 /*
412 * There are (depth * res->base.array_size) slices. Either depth is one
413 * (non-3D) or res->base.array_size is one (non-array), but it does not
414 * matter.
415 */
416 res->slice_offsets[0] =
417 CALLOC(depth * res->base.array_size, sizeof(res->slice_offsets[0][0]));
418 if (!res->slice_offsets[0])
419 return false;
420
421 /* point to the respective positions in the buffer */
422 for (lv = 1; lv <= res->base.last_level; lv++) {
423 res->slice_offsets[lv] = res->slice_offsets[lv - 1] +
424 u_minify(res->base.depth0, lv - 1) * res->base.array_size;
425 }
426
427 return true;
428 }
429
430 static void
431 free_slice_offsets(struct ilo_resource *res)
432 {
433 int lv;
434
435 FREE(res->slice_offsets[0]);
436 for (lv = 0; lv <= res->base.last_level; lv++)
437 res->slice_offsets[lv] = NULL;
438 }
439
440 struct layout_tex_info {
441 bool compressed;
442 int block_width, block_height;
443 int align_i, align_j;
444 int qpitch;
445
446 struct {
447 int w, h, d;
448 } sizes[PIPE_MAX_TEXTURE_LEVELS];
449 };
450
451 /**
452 * Prepare for texture layout.
453 */
454 static void
455 layout_tex_init(const struct ilo_resource *res, struct layout_tex_info *info)
456 {
457 struct ilo_screen *is = ilo_screen(res->base.screen);
458 const enum intel_tiling_mode tiling = res->tiling;
459 const struct pipe_resource *templ = &res->base;
460 int last_level, lv;
461
462 memset(info, 0, sizeof(*info));
463
464 info->compressed = util_format_is_compressed(templ->format);
465 info->block_width = util_format_get_blockwidth(templ->format);
466 info->block_height = util_format_get_blockheight(templ->format);
467
468 /*
469 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
470 *
471 * "surface format align_i align_j
472 * YUV 4:2:2 formats 4 *see below
473 * BC1-5 4 4
474 * FXT1 8 4
475 * all other formats 4 *see below"
476 *
477 * "- align_j = 4 for any depth buffer
478 * - align_j = 2 for separate stencil buffer
479 * - align_j = 4 for any render target surface is multisampled (4x)
480 * - align_j = 4 for any render target surface with Surface Vertical
481 * Alignment = VALIGN_4
482 * - align_j = 2 for any render target surface with Surface Vertical
483 * Alignment = VALIGN_2
484 * - align_j = 2 for all other render target surface
485 * - align_j = 2 for any sampling engine surface with Surface Vertical
486 * Alignment = VALIGN_2
487 * - align_j = 4 for any sampling engine surface with Surface Vertical
488 * Alignment = VALIGN_4"
489 *
490 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
491 *
492 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
493 * the Surface Format is 96 bits per element (BPE)."
494 *
495 * They can be rephrased as
496 *
497 * align_i align_j
498 * compressed formats block width block height
499 * PIPE_FORMAT_S8_UINT 4 2
500 * other depth/stencil formats 4 4
501 * 4x multisampled 4 4
502 * bpp 96 4 2
503 * others 4 2 or 4
504 */
505
506 /*
507 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
508 *
509 * "surface defined by surface format align_i align_j
510 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
511 * not D16_UNORM 4 4
512 * 3DSTATE_STENCIL_BUFFER N/A 8 8
513 * SURFACE_STATE BC*, ETC*, EAC* 4 4
514 * FXT1 8 4
515 * all others (set by SURFACE_STATE)"
516 *
517 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
518 *
519 * "- This field (Surface Vertical Aligment) is intended to be set to
520 * VALIGN_4 if the surface was rendered as a depth buffer, for a
521 * multisampled (4x) render target, or for a multisampled (8x)
522 * render target, since these surfaces support only alignment of 4.
523 * - Use of VALIGN_4 for other surfaces is supported, but uses more
524 * memory.
525 * - This field must be set to VALIGN_4 for all tiled Y Render Target
526 * surfaces.
527 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
528 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
529 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
530 * must be set to VALIGN_4."
531 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
532 *
533 * "- This field (Surface Horizontal Aligment) is intended to be set to
534 * HALIGN_8 only if the surface was rendered as a depth buffer with
535 * Z16 format or a stencil buffer, since these surfaces support only
536 * alignment of 8.
537 * - Use of HALIGN_8 for other surfaces is supported, but uses more
538 * memory.
539 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
540 * - This field must be set to HALIGN_8 if the Surface Format is
541 * FXT1."
542 *
543 * They can be rephrased as
544 *
545 * align_i align_j
546 * compressed formats block width block height
547 * PIPE_FORMAT_Z16_UNORM 8 4
548 * PIPE_FORMAT_S8_UINT 8 8
549 * other depth/stencil formats 4 or 8 4
550 * 2x or 4x multisampled 4 or 8 4
551 * tiled Y 4 or 8 4 (if rt)
552 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
553 * others 4 or 8 2 or 4
554 */
555
556 if (info->compressed) {
557 /* this happens to be the case */
558 info->align_i = info->block_width;
559 info->align_j = info->block_height;
560 }
561 else if (util_format_is_depth_or_stencil(templ->format)) {
562 if (is->dev.gen >= ILO_GEN(7)) {
563 switch (templ->format) {
564 case PIPE_FORMAT_Z16_UNORM:
565 info->align_i = 8;
566 info->align_j = 4;
567 break;
568 case PIPE_FORMAT_S8_UINT:
569 info->align_i = 8;
570 info->align_j = 8;
571 break;
572 default:
573 /*
574 * From the Ivy Bridge PRM, volume 2 part 1, page 319:
575 *
576 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and
577 * Depth Coordinate Offset X) must be zero to ensure correct
578 * alignment"
579 *
580 * We will make use of them and setting align_i to 8 help us meet
581 * the requirement.
582 */
583 info->align_i = (templ->last_level > 0) ? 8 : 4;
584 info->align_j = 4;
585 break;
586 }
587 }
588 else {
589 switch (templ->format) {
590 case PIPE_FORMAT_S8_UINT:
591 info->align_i = 4;
592 info->align_j = 2;
593 break;
594 default:
595 info->align_i = 4;
596 info->align_j = 4;
597 break;
598 }
599 }
600 }
601 else {
602 const bool valign_4 = (templ->nr_samples > 1) ||
603 (is->dev.gen >= ILO_GEN(7) &&
604 (templ->bind & PIPE_BIND_RENDER_TARGET) &&
605 tiling == INTEL_TILING_Y);
606
607 if (valign_4)
608 assert(util_format_get_blocksizebits(templ->format) != 96);
609
610 info->align_i = 4;
611 info->align_j = (valign_4) ? 4 : 2;
612 }
613
614 /*
615 * the fact that align i and j are multiples of block width and height
616 * respectively is what makes the size of the bo a multiple of the block
617 * size, slices start at block boundaries, and many of the computations
618 * work.
619 */
620 assert(info->align_i % info->block_width == 0);
621 assert(info->align_j % info->block_height == 0);
622
623 /* make sure align() works */
624 assert(util_is_power_of_two(info->align_i) &&
625 util_is_power_of_two(info->align_j));
626 assert(util_is_power_of_two(info->block_width) &&
627 util_is_power_of_two(info->block_height));
628
629 last_level = templ->last_level;
630 /* need at least 2 levels to compute qpitch below */
631 if (templ->array_size > 1 && last_level == 0 &&
632 templ->format != PIPE_FORMAT_S8_UINT)
633 last_level++;
634
635 /* compute mip level sizes */
636 for (lv = 0; lv <= last_level; lv++) {
637 int w, h, d;
638
639 w = u_minify(templ->width0, lv);
640 h = u_minify(templ->height0, lv);
641 d = u_minify(templ->depth0, lv);
642
643 /*
644 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
645 *
646 * "The dimensions of the mip maps are first determined by applying
647 * the sizing algorithm presented in Non-Power-of-Two Mipmaps
648 * above. Then, if necessary, they are padded out to compression
649 * block boundaries."
650 */
651 w = align(w, info->block_width);
652 h = align(h, info->block_height);
653
654 /*
655 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
656 *
657 * "If the surface is multisampled (4x), these values must be
658 * adjusted as follows before proceeding:
659 *
660 * W_L = ceiling(W_L / 2) * 4
661 * H_L = ceiling(H_L / 2) * 4"
662 */
663 if (templ->nr_samples > 1) {
664 w = align(w, 2) * 2;
665 h = align(h, 2) * 2;
666 }
667
668 info->sizes[lv].w = w;
669 info->sizes[lv].h = h;
670 info->sizes[lv].d = d;
671 }
672
673 if (templ->array_size > 1) {
674 const int h0 = align(info->sizes[0].h, info->align_j);
675
676 if (templ->format == PIPE_FORMAT_S8_UINT) {
677 info->qpitch = h0;
678 }
679 else {
680 const int h1 = align(info->sizes[1].h, info->align_j);
681
682 /*
683 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
684 *
685 * "The following equation is used for surface formats other than
686 * compressed textures:
687 *
688 * QPitch = (h0 + h1 + 11j)"
689 *
690 * "The equation for compressed textures (BC* and FXT1 surface
691 * formats) follows:
692 *
693 * QPitch = (h0 + h1 + 11j) / 4"
694 *
695 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than
696 * the value calculated in the equation above, for every other
697 * odd Surface Height starting from 1 i.e. 1,5,9,13"
698 *
699 * To access the N-th slice, an offset of (Stride * QPitch * N) is
700 * added to the base address. The PRM divides QPitch by 4 for
701 * compressed formats because the block height for those formats are
702 * 4, and it wants QPitch to mean the number of memory rows, as
703 * opposed to texel rows, between slices. Since we use texel rows in
704 * res->slice_offsets, we do not need to divide QPitch by 4.
705 */
706 info->qpitch = h0 + h1 +
707 ((is->dev.gen >= ILO_GEN(7)) ? 12 : 11) * info->align_j;
708
709 if (is->dev.gen == ILO_GEN(6) && templ->nr_samples > 1 &&
710 templ->height0 % 4 == 1)
711 info->qpitch += 4;
712 }
713 }
714 }
715
716 /**
717 * Layout a 2D texture.
718 */
719 static void
720 layout_tex_2d(struct ilo_resource *res, const struct layout_tex_info *info)
721 {
722 const struct pipe_resource *templ = &res->base;
723 unsigned int level_x, level_y;
724 int lv;
725
726 res->bo_width = 0;
727 res->bo_height = 0;
728
729 level_x = 0;
730 level_y = 0;
731 for (lv = 0; lv <= templ->last_level; lv++) {
732 const unsigned int level_w = info->sizes[lv].w;
733 const unsigned int level_h = info->sizes[lv].h;
734 int slice;
735
736 for (slice = 0; slice < templ->array_size; slice++) {
737 res->slice_offsets[lv][slice].x = level_x;
738 /* slices are qpitch apart in Y-direction */
739 res->slice_offsets[lv][slice].y = level_y + info->qpitch * slice;
740 }
741
742 /* extend the size of the monolithic bo to cover this mip level */
743 if (res->bo_width < level_x + level_w)
744 res->bo_width = level_x + level_w;
745 if (res->bo_height < level_y + level_h)
746 res->bo_height = level_y + level_h;
747
748 /* MIPLAYOUT_BELOW */
749 if (lv == 1)
750 level_x += align(level_w, info->align_i);
751 else
752 level_y += align(level_h, info->align_j);
753 }
754
755 /* we did not take slices into consideration in the computation above */
756 res->bo_height += info->qpitch * (templ->array_size - 1);
757 }
758
759 /**
760 * Layout a 3D texture.
761 */
762 static void
763 layout_tex_3d(struct ilo_resource *res, const struct layout_tex_info *info)
764 {
765 const struct pipe_resource *templ = &res->base;
766 unsigned int level_y;
767 int lv;
768
769 res->bo_width = 0;
770 res->bo_height = 0;
771
772 level_y = 0;
773 for (lv = 0; lv <= templ->last_level; lv++) {
774 const unsigned int level_w = info->sizes[lv].w;
775 const unsigned int level_h = info->sizes[lv].h;
776 const unsigned int level_d = info->sizes[lv].d;
777 const unsigned int slice_pitch = align(level_w, info->align_i);
778 const unsigned int slice_qpitch = align(level_h, info->align_j);
779 const unsigned int num_slices_per_row = 1 << lv;
780 int slice;
781
782 for (slice = 0; slice < level_d; slice += num_slices_per_row) {
783 int i;
784
785 for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
786 res->slice_offsets[lv][slice + i].x = slice_pitch * i;
787 res->slice_offsets[lv][slice + i].y = level_y;
788 }
789
790 /* move on to the next slice row */
791 level_y += slice_qpitch;
792 }
793
794 /* rightmost slice */
795 slice = MIN2(num_slices_per_row, level_d) - 1;
796
797 /* extend the size of the monolithic bo to cover this slice */
798 if (res->bo_width < slice_pitch * slice + level_w)
799 res->bo_width = slice_pitch * slice + level_w;
800 if (lv == templ->last_level)
801 res->bo_height = (level_y - slice_qpitch) + level_h;
802 }
803 }
804
805 /**
806 * Guess the texture size. For large textures, the errors are relative small.
807 */
808 static size_t
809 guess_tex_size(const struct pipe_resource *templ,
810 enum intel_tiling_mode tiling)
811 {
812 int bo_width, bo_height, bo_stride;
813
814 /* HALIGN_8 and VALIGN_4 */
815 bo_width = align(templ->width0, 8);
816 bo_height = align(templ->height0, 4);
817
818 if (templ->target == PIPE_TEXTURE_3D) {
819 const int num_rows = util_next_power_of_two(templ->depth0);
820 int lv, sum;
821
822 sum = bo_height * templ->depth0;
823 for (lv = 1; lv <= templ->last_level; lv++)
824 sum += u_minify(bo_height, lv) * u_minify(num_rows, lv);
825
826 bo_height = sum;
827 }
828 else if (templ->last_level > 0) {
829 /* MIPLAYOUT_BELOW, ignore qpich */
830 bo_height = (bo_height + u_minify(bo_height, 1)) * templ->array_size;
831 }
832
833 bo_stride = util_format_get_stride(templ->format, bo_width);
834
835 switch (tiling) {
836 case INTEL_TILING_X:
837 bo_stride = align(bo_stride, 512);
838 bo_height = align(bo_height, 8);
839 break;
840 case INTEL_TILING_Y:
841 bo_stride = align(bo_stride, 128);
842 bo_height = align(bo_height, 32);
843 break;
844 default:
845 bo_height = align(bo_height, 2);
846 break;
847 }
848
849 return util_format_get_2d_size(templ->format, bo_stride, bo_height);
850 }
851
852 static enum intel_tiling_mode
853 get_tex_tiling(const struct ilo_resource *res)
854 {
855 const struct pipe_resource *templ = &res->base;
856
857 /*
858 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
859 *
860 * "Display/Overlay Y-Major not supported.
861 * X-Major required for Async Flips"
862 */
863 if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
864 return INTEL_TILING_X;
865
866 /*
867 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
868 *
869 * "The cursor surface address must be 4K byte aligned. The cursor must
870 * be in linear memory, it cannot be tiled."
871 */
872 if (unlikely(templ->bind & PIPE_BIND_CURSOR))
873 return INTEL_TILING_NONE;
874
875 /*
876 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
877 *
878 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
879 * Depth Buffer is not supported."
880 *
881 * "The Depth Buffer, if tiled, must use Y-Major tiling."
882 */
883 if (templ->bind & PIPE_BIND_DEPTH_STENCIL)
884 return INTEL_TILING_Y;
885
886 if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
887 enum intel_tiling_mode tiling = INTEL_TILING_NONE;
888
889 /*
890 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
891 *
892 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
893 * either TileX or Linear."
894 *
895 * Also, heuristically set a minimum width/height for enabling tiling.
896 */
897 if (util_format_get_blocksizebits(templ->format) == 128 &&
898 (templ->bind & PIPE_BIND_RENDER_TARGET) && templ->width0 >= 64)
899 tiling = INTEL_TILING_X;
900 else if ((templ->width0 >= 32 && templ->height0 >= 16) ||
901 (templ->width0 >= 16 && templ->height0 >= 32))
902 tiling = INTEL_TILING_Y;
903
904 /* make sure the bo can be mapped through GTT if tiled */
905 if (tiling != INTEL_TILING_NONE) {
906 /*
907 * Usually only the first 256MB of the GTT is mappable.
908 *
909 * See also how intel_context::max_gtt_map_object_size is calculated.
910 */
911 const size_t mappable_gtt_size = 256 * 1024 * 1024;
912 const size_t size = guess_tex_size(templ, tiling);
913
914 /* be conservative */
915 if (size > mappable_gtt_size / 4)
916 tiling = INTEL_TILING_NONE;
917 }
918
919 return tiling;
920 }
921
922 return INTEL_TILING_NONE;
923 }
924
925 static void
926 init_texture(struct ilo_resource *res)
927 {
928 const enum pipe_format format = res->base.format;
929 struct layout_tex_info info;
930
931 /* determine tiling first as it may affect the layout */
932 res->tiling = get_tex_tiling(res);
933
934 layout_tex_init(res, &info);
935
936 res->compressed = info.compressed;
937 res->block_width = info.block_width;
938 res->block_height = info.block_height;
939 res->halign_8 = (info.align_i == 8);
940 res->valign_4 = (info.align_j == 4);
941
942 switch (res->base.target) {
943 case PIPE_TEXTURE_1D:
944 case PIPE_TEXTURE_2D:
945 case PIPE_TEXTURE_CUBE:
946 case PIPE_TEXTURE_RECT:
947 case PIPE_TEXTURE_1D_ARRAY:
948 case PIPE_TEXTURE_2D_ARRAY:
949 case PIPE_TEXTURE_CUBE_ARRAY:
950 layout_tex_2d(res, &info);
951 break;
952 case PIPE_TEXTURE_3D:
953 layout_tex_3d(res, &info);
954 break;
955 default:
956 assert(!"unknown resource target");
957 break;
958 }
959
960 /* in blocks */
961 assert(res->bo_width % info.block_width == 0);
962 assert(res->bo_height % info.block_height == 0);
963 res->bo_width /= info.block_width;
964 res->bo_height /= info.block_height;
965 res->bo_cpp = util_format_get_blocksize(format);
966 }
967
968 static void
969 init_buffer(struct ilo_resource *res)
970 {
971 res->compressed = false;
972 res->block_width = 1;
973 res->block_height = 1;
974 res->halign_8 = false;
975 res->valign_4 = false;
976
977 res->bo_width = res->base.width0;
978 res->bo_height = 1;
979 res->bo_cpp = 1;
980 res->bo_stride = 0;
981 res->tiling = INTEL_TILING_NONE;
982 }
983
984 static struct pipe_resource *
985 create_resource(struct pipe_screen *screen,
986 const struct pipe_resource *templ,
987 struct winsys_handle *handle)
988 {
989 struct ilo_resource *res;
990
991 res = CALLOC_STRUCT(ilo_resource);
992 if (!res)
993 return NULL;
994
995 res->base = *templ;
996 res->base.screen = screen;
997 pipe_reference_init(&res->base.reference, 1);
998 res->handle = handle;
999
1000 if (!alloc_slice_offsets(res)) {
1001 FREE(res);
1002 return NULL;
1003 }
1004
1005 if (templ->target == PIPE_BUFFER)
1006 init_buffer(res);
1007 else
1008 init_texture(res);
1009
1010 if (!realloc_bo(res)) {
1011 free_slice_offsets(res);
1012 FREE(res);
1013 return NULL;
1014 }
1015
1016 return &res->base;
1017 }
1018
1019 static boolean
1020 ilo_can_create_resource(struct pipe_screen *screen,
1021 const struct pipe_resource *templ)
1022 {
1023 /*
1024 * We do not know if we will fail until we try to allocate the bo.
1025 * So just set a limit on the texture size.
1026 */
1027 const size_t max_size = 1 * 1024 * 1024 * 1024;
1028 const size_t size = guess_tex_size(templ, INTEL_TILING_Y);
1029
1030 return (size <= max_size);
1031 }
1032
1033 static struct pipe_resource *
1034 ilo_resource_create(struct pipe_screen *screen,
1035 const struct pipe_resource *templ)
1036 {
1037 return create_resource(screen, templ, NULL);
1038 }
1039
1040 static struct pipe_resource *
1041 ilo_resource_from_handle(struct pipe_screen *screen,
1042 const struct pipe_resource *templ,
1043 struct winsys_handle *handle)
1044 {
1045 return create_resource(screen, templ, handle);
1046 }
1047
1048 static boolean
1049 ilo_resource_get_handle(struct pipe_screen *screen,
1050 struct pipe_resource *r,
1051 struct winsys_handle *handle)
1052 {
1053 struct ilo_resource *res = ilo_resource(r);
1054 int err;
1055
1056 err = res->bo->export_handle(res->bo, handle);
1057
1058 return !err;
1059 }
1060
1061 static void
1062 ilo_resource_destroy(struct pipe_screen *screen,
1063 struct pipe_resource *r)
1064 {
1065 struct ilo_resource *res = ilo_resource(r);
1066
1067 free_slice_offsets(res);
1068 res->bo->unreference(res->bo);
1069 FREE(res);
1070 }
1071
1072 /**
1073 * Initialize resource-related functions.
1074 */
1075 void
1076 ilo_init_resource_functions(struct ilo_screen *is)
1077 {
1078 is->base.can_create_resource = ilo_can_create_resource;
1079 is->base.resource_create = ilo_resource_create;
1080 is->base.resource_from_handle = ilo_resource_from_handle;
1081 is->base.resource_get_handle = ilo_resource_get_handle;
1082 is->base.resource_destroy = ilo_resource_destroy;
1083 }
1084
1085 /**
1086 * Initialize transfer-related functions.
1087 */
1088 void
1089 ilo_init_transfer_functions(struct ilo_context *ilo)
1090 {
1091 ilo->base.transfer_map = ilo_transfer_map;
1092 ilo->base.transfer_flush_region = ilo_transfer_flush_region;
1093 ilo->base.transfer_unmap = ilo_transfer_unmap;
1094 ilo->base.transfer_inline_write = ilo_transfer_inline_write;
1095 }
1096
1097 /**
1098 * Return the offset (in bytes) to a slice within the bo.
1099 *
1100 * When tile_aligned is true, the offset is to the tile containing the start
1101 * address of the slice. x_offset and y_offset are offsets (in pixels) from
1102 * the tile start to slice start. x_offset is always a multiple of 4 and
1103 * y_offset is always a multiple of 2.
1104 */
1105 unsigned
1106 ilo_resource_get_slice_offset(const struct ilo_resource *res,
1107 int level, int slice, bool tile_aligned,
1108 unsigned *x_offset, unsigned *y_offset)
1109 {
1110 const unsigned x = res->slice_offsets[level][slice].x / res->block_width;
1111 const unsigned y = res->slice_offsets[level][slice].y / res->block_height;
1112 unsigned tile_w, tile_h, tile_size, row_size;
1113 unsigned slice_offset;
1114
1115 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1116
1117 switch (res->tiling) {
1118 case INTEL_TILING_NONE:
1119 tile_w = res->bo_cpp;
1120 tile_h = 1;
1121 break;
1122 case INTEL_TILING_X:
1123 tile_w = 512;
1124 tile_h = 8;
1125 break;
1126 case INTEL_TILING_Y:
1127 tile_w = 128;
1128 tile_h = 32;
1129 break;
1130 default:
1131 assert(!"unknown tiling");
1132 tile_w = res->bo_cpp;
1133 tile_h = 1;
1134 break;
1135 }
1136
1137 tile_size = tile_w * tile_h;
1138 row_size = res->bo_stride * tile_h;
1139
1140 /*
1141 * for non-tiled resources, this is equivalent to
1142 *
1143 * slice_offset = y * res->bo_stride + x * res->bo_cpp;
1144 */
1145 slice_offset =
1146 row_size * (y / tile_h) + tile_size * (x * res->bo_cpp / tile_w);
1147
1148 /*
1149 * Since res->bo_stride is a multiple of tile_w, slice_offset should be
1150 * aligned at this point.
1151 */
1152 assert(slice_offset % tile_size == 0);
1153
1154 if (tile_aligned) {
1155 /*
1156 * because of the possible values of align_i and align_j in
1157 * layout_tex_init(), x_offset must be a multiple of 4 and y_offset must
1158 * be a multiple of 2.
1159 */
1160 if (x_offset) {
1161 assert(tile_w % res->bo_cpp == 0);
1162 *x_offset = (x % (tile_w / res->bo_cpp)) * res->block_width;
1163 assert(*x_offset % 4 == 0);
1164 }
1165 if (y_offset) {
1166 *y_offset = (y % tile_h) * res->block_height;
1167 assert(*y_offset % 2 == 0);
1168 }
1169 }
1170 else {
1171 const unsigned tx = (x * res->bo_cpp) % tile_w;
1172 const unsigned ty = y % tile_h;
1173
1174 switch (res->tiling) {
1175 case INTEL_TILING_NONE:
1176 assert(tx == 0 && ty == 0);
1177 break;
1178 case INTEL_TILING_X:
1179 slice_offset += tile_w * ty + tx;
1180 break;
1181 case INTEL_TILING_Y:
1182 slice_offset += tile_h * 16 * (tx / 16) + ty * 16 + (tx % 16);
1183 break;
1184 }
1185
1186 if (x_offset)
1187 *x_offset = 0;
1188 if (y_offset)
1189 *y_offset = 0;
1190 }
1191
1192 return slice_offset;
1193 }