ilo: move transfer-related functions to a new file
[mesa.git] / src / gallium / drivers / ilo / ilo_resource.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_screen.h"
29 #include "ilo_resource.h"
30
31 /* use PIPE_BIND_CUSTOM to indicate MCS */
32 #define ILO_BIND_MCS PIPE_BIND_CUSTOM
33
34 static struct intel_bo *
35 alloc_buf_bo(const struct ilo_resource *res)
36 {
37 struct ilo_screen *is = ilo_screen(res->base.screen);
38 struct intel_bo *bo;
39 const char *name;
40 const unsigned size = res->bo_width;
41
42 switch (res->base.bind) {
43 case PIPE_BIND_VERTEX_BUFFER:
44 name = "vertex buffer";
45 break;
46 case PIPE_BIND_INDEX_BUFFER:
47 name = "index buffer";
48 break;
49 case PIPE_BIND_CONSTANT_BUFFER:
50 name = "constant buffer";
51 break;
52 case PIPE_BIND_STREAM_OUTPUT:
53 name = "stream output";
54 break;
55 default:
56 name = "unknown buffer";
57 break;
58 }
59
60 /* this is what a buffer supposed to be like */
61 assert(res->bo_width * res->bo_height * res->bo_cpp == size);
62 assert(res->tiling == INTEL_TILING_NONE);
63 assert(res->bo_stride == 0);
64
65 if (res->handle) {
66 bo = is->winsys->import_handle(is->winsys, name,
67 res->bo_width, res->bo_height, res->bo_cpp, res->handle);
68
69 /* since the bo is shared to us, make sure it meets the expectations */
70 if (bo) {
71 assert(bo->get_size(res->bo) == size);
72 assert(bo->get_tiling(res->bo) == res->tiling);
73 assert(bo->get_pitch(res->bo) == res->bo_stride);
74 }
75 }
76 else {
77 bo = is->winsys->alloc_buffer(is->winsys, name, size, 0);
78 }
79
80 return bo;
81 }
82
83 static struct intel_bo *
84 alloc_tex_bo(const struct ilo_resource *res)
85 {
86 struct ilo_screen *is = ilo_screen(res->base.screen);
87 struct intel_bo *bo;
88 const char *name;
89
90 switch (res->base.target) {
91 case PIPE_TEXTURE_1D:
92 name = "1D texture";
93 break;
94 case PIPE_TEXTURE_2D:
95 name = "2D texture";
96 break;
97 case PIPE_TEXTURE_3D:
98 name = "3D texture";
99 break;
100 case PIPE_TEXTURE_CUBE:
101 name = "cube texture";
102 break;
103 case PIPE_TEXTURE_RECT:
104 name = "rectangle texture";
105 break;
106 case PIPE_TEXTURE_1D_ARRAY:
107 name = "1D array texture";
108 break;
109 case PIPE_TEXTURE_2D_ARRAY:
110 name = "2D array texture";
111 break;
112 case PIPE_TEXTURE_CUBE_ARRAY:
113 name = "cube array texture";
114 break;
115 default:
116 name ="unknown texture";
117 break;
118 }
119
120 if (res->handle) {
121 bo = is->winsys->import_handle(is->winsys, name,
122 res->bo_width, res->bo_height, res->bo_cpp, res->handle);
123 }
124 else {
125 const bool for_render =
126 (res->base.bind & (PIPE_BIND_DEPTH_STENCIL |
127 PIPE_BIND_RENDER_TARGET));
128 const unsigned long flags =
129 (for_render) ? INTEL_ALLOC_FOR_RENDER : 0;
130
131 bo = is->winsys->alloc(is->winsys, name,
132 res->bo_width, res->bo_height, res->bo_cpp,
133 res->tiling, flags);
134 }
135
136 return bo;
137 }
138
139 bool
140 ilo_resource_alloc_bo(struct ilo_resource *res)
141 {
142 struct intel_bo *old_bo = res->bo;
143
144 /* a shared bo cannot be reallocated */
145 if (old_bo && res->handle)
146 return false;
147
148 if (res->base.target == PIPE_BUFFER)
149 res->bo = alloc_buf_bo(res);
150 else
151 res->bo = alloc_tex_bo(res);
152
153 if (!res->bo) {
154 res->bo = old_bo;
155 return false;
156 }
157
158 /* winsys may decide to use a different tiling */
159 res->tiling = res->bo->get_tiling(res->bo);
160 res->bo_stride = res->bo->get_pitch(res->bo);
161
162 if (old_bo)
163 old_bo->unreference(old_bo);
164
165 return true;
166 }
167
168 static bool
169 alloc_slice_offsets(struct ilo_resource *res)
170 {
171 int depth, lv;
172
173 /* sum the depths of all levels */
174 depth = 0;
175 for (lv = 0; lv <= res->base.last_level; lv++)
176 depth += u_minify(res->base.depth0, lv);
177
178 /*
179 * There are (depth * res->base.array_size) slices. Either depth is one
180 * (non-3D) or res->base.array_size is one (non-array), but it does not
181 * matter.
182 */
183 res->slice_offsets[0] =
184 CALLOC(depth * res->base.array_size, sizeof(res->slice_offsets[0][0]));
185 if (!res->slice_offsets[0])
186 return false;
187
188 /* point to the respective positions in the buffer */
189 for (lv = 1; lv <= res->base.last_level; lv++) {
190 res->slice_offsets[lv] = res->slice_offsets[lv - 1] +
191 u_minify(res->base.depth0, lv - 1) * res->base.array_size;
192 }
193
194 return true;
195 }
196
197 static void
198 free_slice_offsets(struct ilo_resource *res)
199 {
200 int lv;
201
202 FREE(res->slice_offsets[0]);
203 for (lv = 0; lv <= res->base.last_level; lv++)
204 res->slice_offsets[lv] = NULL;
205 }
206
207 struct layout_tex_info {
208 bool compressed;
209 int block_width, block_height;
210 int align_i, align_j;
211 bool array_spacing_full;
212 bool interleaved;
213 int qpitch;
214
215 struct {
216 int w, h, d;
217 } sizes[PIPE_MAX_TEXTURE_LEVELS];
218 };
219
220 /**
221 * Prepare for texture layout.
222 */
223 static void
224 layout_tex_init(const struct ilo_resource *res, struct layout_tex_info *info)
225 {
226 struct ilo_screen *is = ilo_screen(res->base.screen);
227 const enum pipe_format bo_format = res->bo_format;
228 const enum intel_tiling_mode tiling = res->tiling;
229 const struct pipe_resource *templ = &res->base;
230 int last_level, lv;
231
232 memset(info, 0, sizeof(*info));
233
234 info->compressed = util_format_is_compressed(bo_format);
235 info->block_width = util_format_get_blockwidth(bo_format);
236 info->block_height = util_format_get_blockheight(bo_format);
237
238 /*
239 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
240 *
241 * "surface format align_i align_j
242 * YUV 4:2:2 formats 4 *see below
243 * BC1-5 4 4
244 * FXT1 8 4
245 * all other formats 4 *see below"
246 *
247 * "- align_j = 4 for any depth buffer
248 * - align_j = 2 for separate stencil buffer
249 * - align_j = 4 for any render target surface is multisampled (4x)
250 * - align_j = 4 for any render target surface with Surface Vertical
251 * Alignment = VALIGN_4
252 * - align_j = 2 for any render target surface with Surface Vertical
253 * Alignment = VALIGN_2
254 * - align_j = 2 for all other render target surface
255 * - align_j = 2 for any sampling engine surface with Surface Vertical
256 * Alignment = VALIGN_2
257 * - align_j = 4 for any sampling engine surface with Surface Vertical
258 * Alignment = VALIGN_4"
259 *
260 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
261 *
262 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
263 * the Surface Format is 96 bits per element (BPE)."
264 *
265 * They can be rephrased as
266 *
267 * align_i align_j
268 * compressed formats block width block height
269 * PIPE_FORMAT_S8_UINT 4 2
270 * other depth/stencil formats 4 4
271 * 4x multisampled 4 4
272 * bpp 96 4 2
273 * others 4 2 or 4
274 */
275
276 /*
277 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
278 *
279 * "surface defined by surface format align_i align_j
280 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
281 * not D16_UNORM 4 4
282 * 3DSTATE_STENCIL_BUFFER N/A 8 8
283 * SURFACE_STATE BC*, ETC*, EAC* 4 4
284 * FXT1 8 4
285 * all others (set by SURFACE_STATE)"
286 *
287 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
288 *
289 * "- This field (Surface Vertical Aligment) is intended to be set to
290 * VALIGN_4 if the surface was rendered as a depth buffer, for a
291 * multisampled (4x) render target, or for a multisampled (8x)
292 * render target, since these surfaces support only alignment of 4.
293 * - Use of VALIGN_4 for other surfaces is supported, but uses more
294 * memory.
295 * - This field must be set to VALIGN_4 for all tiled Y Render Target
296 * surfaces.
297 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
298 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
299 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
300 * must be set to VALIGN_4."
301 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
302 *
303 * "- This field (Surface Horizontal Aligment) is intended to be set to
304 * HALIGN_8 only if the surface was rendered as a depth buffer with
305 * Z16 format or a stencil buffer, since these surfaces support only
306 * alignment of 8.
307 * - Use of HALIGN_8 for other surfaces is supported, but uses more
308 * memory.
309 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
310 * - This field must be set to HALIGN_8 if the Surface Format is
311 * FXT1."
312 *
313 * They can be rephrased as
314 *
315 * align_i align_j
316 * compressed formats block width block height
317 * PIPE_FORMAT_Z16_UNORM 8 4
318 * PIPE_FORMAT_S8_UINT 8 8
319 * other depth/stencil formats 4 or 8 4
320 * 2x or 4x multisampled 4 or 8 4
321 * tiled Y 4 or 8 4 (if rt)
322 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
323 * others 4 or 8 2 or 4
324 */
325
326 if (info->compressed) {
327 /* this happens to be the case */
328 info->align_i = info->block_width;
329 info->align_j = info->block_height;
330 }
331 else if (util_format_is_depth_or_stencil(bo_format)) {
332 if (is->dev.gen >= ILO_GEN(7)) {
333 switch (bo_format) {
334 case PIPE_FORMAT_Z16_UNORM:
335 info->align_i = 8;
336 info->align_j = 4;
337 break;
338 case PIPE_FORMAT_S8_UINT:
339 info->align_i = 8;
340 info->align_j = 8;
341 break;
342 default:
343 /*
344 * From the Ivy Bridge PRM, volume 2 part 1, page 319:
345 *
346 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and
347 * Depth Coordinate Offset X) must be zero to ensure correct
348 * alignment"
349 *
350 * We will make use of them and setting align_i to 8 help us meet
351 * the requirement.
352 */
353 info->align_i = (templ->last_level > 0) ? 8 : 4;
354 info->align_j = 4;
355 break;
356 }
357 }
358 else {
359 switch (bo_format) {
360 case PIPE_FORMAT_S8_UINT:
361 info->align_i = 4;
362 info->align_j = 2;
363 break;
364 default:
365 info->align_i = 4;
366 info->align_j = 4;
367 break;
368 }
369 }
370 }
371 else {
372 const bool valign_4 = (templ->nr_samples > 1) ||
373 (is->dev.gen >= ILO_GEN(7) &&
374 (templ->bind & PIPE_BIND_RENDER_TARGET) &&
375 tiling == INTEL_TILING_Y);
376
377 if (valign_4)
378 assert(util_format_get_blocksizebits(bo_format) != 96);
379
380 info->align_i = 4;
381 info->align_j = (valign_4) ? 4 : 2;
382 }
383
384 /*
385 * the fact that align i and j are multiples of block width and height
386 * respectively is what makes the size of the bo a multiple of the block
387 * size, slices start at block boundaries, and many of the computations
388 * work.
389 */
390 assert(info->align_i % info->block_width == 0);
391 assert(info->align_j % info->block_height == 0);
392
393 /* make sure align() works */
394 assert(util_is_power_of_two(info->align_i) &&
395 util_is_power_of_two(info->align_j));
396 assert(util_is_power_of_two(info->block_width) &&
397 util_is_power_of_two(info->block_height));
398
399 if (is->dev.gen >= ILO_GEN(7)) {
400 /*
401 * It is not explicitly states, but render targets are expected to be
402 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are
403 * expected to be IMS (samples interleaved).
404 *
405 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
406 */
407 if (util_format_is_depth_or_stencil(bo_format)) {
408 info->interleaved = true;
409
410 /*
411 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
412 *
413 * "note that the depth buffer and stencil buffer have an implied
414 * value of ARYSPC_FULL"
415 */
416 info->array_spacing_full = true;
417 }
418 else {
419 info->interleaved = false;
420
421 /*
422 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
423 *
424 * "If Multisampled Surface Storage Format is MSFMT_MSS and
425 * Number of Multisamples is not MULTISAMPLECOUNT_1, this field
426 * (Surface Array Spacing) must be set to ARYSPC_LOD0."
427 *
428 * As multisampled resources are not mipmapped, we never use
429 * ARYSPC_FULL for them.
430 */
431 if (templ->nr_samples > 1)
432 assert(templ->last_level == 0);
433 info->array_spacing_full = (templ->last_level > 0);
434 }
435 }
436 else {
437 /* GEN6 supports only interleaved samples */
438 info->interleaved = true;
439
440 /*
441 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
442 *
443 * "The separate stencil buffer does not support mip mapping, thus
444 * the storage for LODs other than LOD 0 is not needed. The
445 * following QPitch equation applies only to the separate stencil
446 * buffer:
447 *
448 * QPitch = h_0"
449 *
450 * GEN6 does not support compact spacing otherwise.
451 */
452 info->array_spacing_full = (bo_format != PIPE_FORMAT_S8_UINT);
453 }
454
455 last_level = templ->last_level;
456
457 /* need at least 2 levels to compute full qpitch */
458 if (last_level == 0 && templ->array_size > 1 && info->array_spacing_full)
459 last_level++;
460
461 /* compute mip level sizes */
462 for (lv = 0; lv <= last_level; lv++) {
463 int w, h, d;
464
465 w = u_minify(templ->width0, lv);
466 h = u_minify(templ->height0, lv);
467 d = u_minify(templ->depth0, lv);
468
469 /*
470 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
471 *
472 * "The dimensions of the mip maps are first determined by applying
473 * the sizing algorithm presented in Non-Power-of-Two Mipmaps
474 * above. Then, if necessary, they are padded out to compression
475 * block boundaries."
476 */
477 w = align(w, info->block_width);
478 h = align(h, info->block_height);
479
480 /*
481 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
482 *
483 * "If the surface is multisampled (4x), these values must be
484 * adjusted as follows before proceeding:
485 *
486 * W_L = ceiling(W_L / 2) * 4
487 * H_L = ceiling(H_L / 2) * 4"
488 *
489 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
490 *
491 * "If the surface is multisampled and it is a depth or stencil
492 * surface or Multisampled Surface StorageFormat in SURFACE_STATE
493 * is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows
494 * before proceeding:
495 *
496 * #samples W_L = H_L =
497 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
498 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
499 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
500 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
501 *
502 * For interleaved samples (4x), where pixels
503 *
504 * (x, y ) (x+1, y )
505 * (x, y+1) (x+1, y+1)
506 *
507 * would be is occupied by
508 *
509 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
510 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
511 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
512 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
513 *
514 * Thus the need to
515 *
516 * w = align(w, 2) * 2;
517 * y = align(y, 2) * 2;
518 */
519 if (info->interleaved) {
520 switch (templ->nr_samples) {
521 case 0:
522 case 1:
523 break;
524 case 2:
525 w = align(w, 2) * 2;
526 break;
527 case 4:
528 w = align(w, 2) * 2;
529 h = align(h, 2) * 2;
530 break;
531 case 8:
532 w = align(w, 2) * 4;
533 h = align(h, 2) * 2;
534 break;
535 case 16:
536 w = align(w, 2) * 4;
537 h = align(h, 2) * 4;
538 break;
539 default:
540 assert(!"unsupported sample count");
541 break;
542 }
543 }
544
545 info->sizes[lv].w = w;
546 info->sizes[lv].h = h;
547 info->sizes[lv].d = d;
548 }
549
550 if (templ->array_size > 1) {
551 const int h0 = align(info->sizes[0].h, info->align_j);
552
553 if (info->array_spacing_full) {
554 const int h1 = align(info->sizes[1].h, info->align_j);
555
556 /*
557 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
558 *
559 * "The following equation is used for surface formats other than
560 * compressed textures:
561 *
562 * QPitch = (h0 + h1 + 11j)"
563 *
564 * "The equation for compressed textures (BC* and FXT1 surface
565 * formats) follows:
566 *
567 * QPitch = (h0 + h1 + 11j) / 4"
568 *
569 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than
570 * the value calculated in the equation above, for every other
571 * odd Surface Height starting from 1 i.e. 1,5,9,13"
572 *
573 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
574 *
575 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the
576 * depth buffer and stencil buffer have an implied value of
577 * ARYSPC_FULL):
578 *
579 * QPitch = (h0 + h1 + 12j)
580 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
581 *
582 * (There are many typos or missing words here...)"
583 *
584 * To access the N-th slice, an offset of (Stride * QPitch * N) is
585 * added to the base address. The PRM divides QPitch by 4 for
586 * compressed formats because the block height for those formats are
587 * 4, and it wants QPitch to mean the number of memory rows, as
588 * opposed to texel rows, between slices. Since we use texel rows in
589 * res->slice_offsets, we do not need to divide QPitch by 4.
590 */
591 info->qpitch = h0 + h1 +
592 ((is->dev.gen >= ILO_GEN(7)) ? 12 : 11) * info->align_j;
593
594 if (is->dev.gen == ILO_GEN(6) && templ->nr_samples > 1 &&
595 templ->height0 % 4 == 1)
596 info->qpitch += 4;
597 }
598 else {
599 info->qpitch = h0;
600 }
601 }
602 }
603
604 /**
605 * Layout a 2D texture.
606 */
607 static void
608 layout_tex_2d(struct ilo_resource *res, const struct layout_tex_info *info)
609 {
610 const struct pipe_resource *templ = &res->base;
611 unsigned int level_x, level_y, num_slices;
612 int lv;
613
614 res->bo_width = 0;
615 res->bo_height = 0;
616
617 level_x = 0;
618 level_y = 0;
619 for (lv = 0; lv <= templ->last_level; lv++) {
620 const unsigned int level_w = info->sizes[lv].w;
621 const unsigned int level_h = info->sizes[lv].h;
622 int slice;
623
624 for (slice = 0; slice < templ->array_size; slice++) {
625 res->slice_offsets[lv][slice].x = level_x;
626 /* slices are qpitch apart in Y-direction */
627 res->slice_offsets[lv][slice].y = level_y + info->qpitch * slice;
628 }
629
630 /* extend the size of the monolithic bo to cover this mip level */
631 if (res->bo_width < level_x + level_w)
632 res->bo_width = level_x + level_w;
633 if (res->bo_height < level_y + level_h)
634 res->bo_height = level_y + level_h;
635
636 /* MIPLAYOUT_BELOW */
637 if (lv == 1)
638 level_x += align(level_w, info->align_i);
639 else
640 level_y += align(level_h, info->align_j);
641 }
642
643 num_slices = templ->array_size;
644 /* samples of the same index are stored in a slice */
645 if (templ->nr_samples > 1 && !info->interleaved)
646 num_slices *= templ->nr_samples;
647
648 /* we did not take slices into consideration in the computation above */
649 res->bo_height += info->qpitch * (num_slices - 1);
650 }
651
652 /**
653 * Layout a 3D texture.
654 */
655 static void
656 layout_tex_3d(struct ilo_resource *res, const struct layout_tex_info *info)
657 {
658 const struct pipe_resource *templ = &res->base;
659 unsigned int level_y;
660 int lv;
661
662 res->bo_width = 0;
663 res->bo_height = 0;
664
665 level_y = 0;
666 for (lv = 0; lv <= templ->last_level; lv++) {
667 const unsigned int level_w = info->sizes[lv].w;
668 const unsigned int level_h = info->sizes[lv].h;
669 const unsigned int level_d = info->sizes[lv].d;
670 const unsigned int slice_pitch = align(level_w, info->align_i);
671 const unsigned int slice_qpitch = align(level_h, info->align_j);
672 const unsigned int num_slices_per_row = 1 << lv;
673 int slice;
674
675 for (slice = 0; slice < level_d; slice += num_slices_per_row) {
676 int i;
677
678 for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
679 res->slice_offsets[lv][slice + i].x = slice_pitch * i;
680 res->slice_offsets[lv][slice + i].y = level_y;
681 }
682
683 /* move on to the next slice row */
684 level_y += slice_qpitch;
685 }
686
687 /* rightmost slice */
688 slice = MIN2(num_slices_per_row, level_d) - 1;
689
690 /* extend the size of the monolithic bo to cover this slice */
691 if (res->bo_width < slice_pitch * slice + level_w)
692 res->bo_width = slice_pitch * slice + level_w;
693 if (lv == templ->last_level)
694 res->bo_height = (level_y - slice_qpitch) + level_h;
695 }
696 }
697
698 /**
699 * Guess the texture size. For large textures, the errors are relative small.
700 */
701 static size_t
702 guess_tex_size(const struct pipe_resource *templ,
703 enum intel_tiling_mode tiling)
704 {
705 int bo_width, bo_height, bo_stride;
706
707 /* HALIGN_8 and VALIGN_4 */
708 bo_width = align(templ->width0, 8);
709 bo_height = align(templ->height0, 4);
710
711 if (templ->target == PIPE_TEXTURE_3D) {
712 const int num_rows = util_next_power_of_two(templ->depth0);
713 int lv, sum;
714
715 sum = bo_height * templ->depth0;
716 for (lv = 1; lv <= templ->last_level; lv++)
717 sum += u_minify(bo_height, lv) * u_minify(num_rows, lv);
718
719 bo_height = sum;
720 }
721 else if (templ->last_level > 0) {
722 /* MIPLAYOUT_BELOW, ignore qpich */
723 bo_height = (bo_height + u_minify(bo_height, 1)) * templ->array_size;
724 }
725
726 bo_stride = util_format_get_stride(templ->format, bo_width);
727
728 switch (tiling) {
729 case INTEL_TILING_X:
730 bo_stride = align(bo_stride, 512);
731 bo_height = align(bo_height, 8);
732 break;
733 case INTEL_TILING_Y:
734 bo_stride = align(bo_stride, 128);
735 bo_height = align(bo_height, 32);
736 break;
737 default:
738 bo_height = align(bo_height, 2);
739 break;
740 }
741
742 return util_format_get_2d_size(templ->format, bo_stride, bo_height);
743 }
744
745 static enum intel_tiling_mode
746 get_tex_tiling(const struct ilo_resource *res)
747 {
748 const struct pipe_resource *templ = &res->base;
749 const enum pipe_format bo_format = res->bo_format;
750
751 /*
752 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
753 *
754 * "Display/Overlay Y-Major not supported.
755 * X-Major required for Async Flips"
756 */
757 if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
758 return INTEL_TILING_X;
759
760 /*
761 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
762 *
763 * "The cursor surface address must be 4K byte aligned. The cursor must
764 * be in linear memory, it cannot be tiled."
765 */
766 if (unlikely(templ->bind & PIPE_BIND_CURSOR))
767 return INTEL_TILING_NONE;
768
769 /*
770 * From the Ivy Bridge PRM, volume 4 part 1, page 76:
771 *
772 * "The MCS surface must be stored as Tile Y."
773 */
774 if (templ->bind & ILO_BIND_MCS)
775 return INTEL_TILING_Y;
776
777 /*
778 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
779 *
780 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
781 * Depth Buffer is not supported."
782 *
783 * "The Depth Buffer, if tiled, must use Y-Major tiling."
784 */
785 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
786 /* separate stencil uses W-tiling but we do not know how to specify that */
787 return (bo_format == PIPE_FORMAT_S8_UINT) ?
788 INTEL_TILING_NONE : INTEL_TILING_Y;
789 }
790
791 if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
792 enum intel_tiling_mode tiling = INTEL_TILING_NONE;
793
794 /*
795 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
796 *
797 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
798 * either TileX or Linear."
799 *
800 * Also, heuristically set a minimum width/height for enabling tiling.
801 */
802 if (util_format_get_blocksizebits(bo_format) == 128 &&
803 (templ->bind & PIPE_BIND_RENDER_TARGET) && templ->width0 >= 64)
804 tiling = INTEL_TILING_X;
805 else if ((templ->width0 >= 32 && templ->height0 >= 16) ||
806 (templ->width0 >= 16 && templ->height0 >= 32))
807 tiling = INTEL_TILING_Y;
808
809 /* make sure the bo can be mapped through GTT if tiled */
810 if (tiling != INTEL_TILING_NONE) {
811 /*
812 * Usually only the first 256MB of the GTT is mappable.
813 *
814 * See also how intel_context::max_gtt_map_object_size is calculated.
815 */
816 const size_t mappable_gtt_size = 256 * 1024 * 1024;
817 const size_t size = guess_tex_size(templ, tiling);
818
819 /* be conservative */
820 if (size > mappable_gtt_size / 4)
821 tiling = INTEL_TILING_NONE;
822 }
823
824 return tiling;
825 }
826
827 return INTEL_TILING_NONE;
828 }
829
830 static void
831 init_texture(struct ilo_resource *res)
832 {
833 struct layout_tex_info info;
834
835 switch (res->base.format) {
836 case PIPE_FORMAT_ETC1_RGB8:
837 res->bo_format = PIPE_FORMAT_R8G8B8X8_UNORM;
838 break;
839 default:
840 res->bo_format = res->base.format;
841 break;
842 }
843
844 /* determine tiling first as it may affect the layout */
845 res->tiling = get_tex_tiling(res);
846
847 layout_tex_init(res, &info);
848
849 res->compressed = info.compressed;
850 res->block_width = info.block_width;
851 res->block_height = info.block_height;
852
853 res->halign_8 = (info.align_i == 8);
854 res->valign_4 = (info.align_j == 4);
855 res->array_spacing_full = info.array_spacing_full;
856 res->interleaved = info.interleaved;
857
858 switch (res->base.target) {
859 case PIPE_TEXTURE_1D:
860 case PIPE_TEXTURE_2D:
861 case PIPE_TEXTURE_CUBE:
862 case PIPE_TEXTURE_RECT:
863 case PIPE_TEXTURE_1D_ARRAY:
864 case PIPE_TEXTURE_2D_ARRAY:
865 case PIPE_TEXTURE_CUBE_ARRAY:
866 layout_tex_2d(res, &info);
867 break;
868 case PIPE_TEXTURE_3D:
869 layout_tex_3d(res, &info);
870 break;
871 default:
872 assert(!"unknown resource target");
873 break;
874 }
875
876 /*
877 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
878 *
879 * "A 4KB tile is subdivided into 8-high by 8-wide array of Blocks for
880 * W-Major Tiles (W Tiles). Each Block is 8 rows by 8 bytes."
881 *
882 * Since we ask for INTEL_TILING_NONE instead lf INTEL_TILING_W, we need to
883 * manually align the bo width and height to the tile boundaries.
884 */
885 if (res->bo_format == PIPE_FORMAT_S8_UINT) {
886 res->bo_width = align(res->bo_width, 64);
887 res->bo_height = align(res->bo_height, 64);
888 }
889
890 /* in blocks */
891 assert(res->bo_width % info.block_width == 0);
892 assert(res->bo_height % info.block_height == 0);
893 res->bo_width /= info.block_width;
894 res->bo_height /= info.block_height;
895 res->bo_cpp = util_format_get_blocksize(res->bo_format);
896 }
897
898 static void
899 init_buffer(struct ilo_resource *res)
900 {
901 res->bo_format = res->base.format;
902 res->bo_width = res->base.width0;
903 res->bo_height = 1;
904 res->bo_cpp = 1;
905 res->bo_stride = 0;
906 res->tiling = INTEL_TILING_NONE;
907
908 res->compressed = false;
909 res->block_width = 1;
910 res->block_height = 1;
911
912 res->halign_8 = false;
913 res->valign_4 = false;
914 res->array_spacing_full = false;
915 res->interleaved = false;
916 }
917
918 static struct pipe_resource *
919 create_resource(struct pipe_screen *screen,
920 const struct pipe_resource *templ,
921 struct winsys_handle *handle)
922 {
923 struct ilo_resource *res;
924
925 res = CALLOC_STRUCT(ilo_resource);
926 if (!res)
927 return NULL;
928
929 res->base = *templ;
930 res->base.screen = screen;
931 pipe_reference_init(&res->base.reference, 1);
932 res->handle = handle;
933
934 if (!alloc_slice_offsets(res)) {
935 FREE(res);
936 return NULL;
937 }
938
939 if (templ->target == PIPE_BUFFER)
940 init_buffer(res);
941 else
942 init_texture(res);
943
944 if (!ilo_resource_alloc_bo(res)) {
945 free_slice_offsets(res);
946 FREE(res);
947 return NULL;
948 }
949
950 return &res->base;
951 }
952
953 static boolean
954 ilo_can_create_resource(struct pipe_screen *screen,
955 const struct pipe_resource *templ)
956 {
957 /*
958 * We do not know if we will fail until we try to allocate the bo.
959 * So just set a limit on the texture size.
960 */
961 const size_t max_size = 1 * 1024 * 1024 * 1024;
962 const size_t size = guess_tex_size(templ, INTEL_TILING_Y);
963
964 return (size <= max_size);
965 }
966
967 static struct pipe_resource *
968 ilo_resource_create(struct pipe_screen *screen,
969 const struct pipe_resource *templ)
970 {
971 return create_resource(screen, templ, NULL);
972 }
973
974 static struct pipe_resource *
975 ilo_resource_from_handle(struct pipe_screen *screen,
976 const struct pipe_resource *templ,
977 struct winsys_handle *handle)
978 {
979 return create_resource(screen, templ, handle);
980 }
981
982 static boolean
983 ilo_resource_get_handle(struct pipe_screen *screen,
984 struct pipe_resource *r,
985 struct winsys_handle *handle)
986 {
987 struct ilo_resource *res = ilo_resource(r);
988 int err;
989
990 err = res->bo->export_handle(res->bo, handle);
991
992 return !err;
993 }
994
995 static void
996 ilo_resource_destroy(struct pipe_screen *screen,
997 struct pipe_resource *r)
998 {
999 struct ilo_resource *res = ilo_resource(r);
1000
1001 free_slice_offsets(res);
1002 res->bo->unreference(res->bo);
1003 FREE(res);
1004 }
1005
1006 /**
1007 * Initialize resource-related functions.
1008 */
1009 void
1010 ilo_init_resource_functions(struct ilo_screen *is)
1011 {
1012 is->base.can_create_resource = ilo_can_create_resource;
1013 is->base.resource_create = ilo_resource_create;
1014 is->base.resource_from_handle = ilo_resource_from_handle;
1015 is->base.resource_get_handle = ilo_resource_get_handle;
1016 is->base.resource_destroy = ilo_resource_destroy;
1017 }
1018
1019 /**
1020 * Return the offset (in bytes) to a slice within the bo.
1021 *
1022 * When tile_aligned is true, the offset is to the tile containing the start
1023 * address of the slice. x_offset and y_offset are offsets (in pixels) from
1024 * the tile start to slice start. x_offset is always a multiple of 4 and
1025 * y_offset is always a multiple of 2.
1026 */
1027 unsigned
1028 ilo_resource_get_slice_offset(const struct ilo_resource *res,
1029 int level, int slice, bool tile_aligned,
1030 unsigned *x_offset, unsigned *y_offset)
1031 {
1032 const unsigned x = res->slice_offsets[level][slice].x / res->block_width;
1033 const unsigned y = res->slice_offsets[level][slice].y / res->block_height;
1034 unsigned tile_w, tile_h, tile_size, row_size;
1035 unsigned slice_offset;
1036
1037 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1038
1039 switch (res->tiling) {
1040 case INTEL_TILING_NONE:
1041 tile_w = res->bo_cpp;
1042 tile_h = 1;
1043 break;
1044 case INTEL_TILING_X:
1045 tile_w = 512;
1046 tile_h = 8;
1047 break;
1048 case INTEL_TILING_Y:
1049 tile_w = 128;
1050 tile_h = 32;
1051 break;
1052 default:
1053 assert(!"unknown tiling");
1054 tile_w = res->bo_cpp;
1055 tile_h = 1;
1056 break;
1057 }
1058
1059 tile_size = tile_w * tile_h;
1060 row_size = res->bo_stride * tile_h;
1061
1062 /*
1063 * for non-tiled resources, this is equivalent to
1064 *
1065 * slice_offset = y * res->bo_stride + x * res->bo_cpp;
1066 */
1067 slice_offset =
1068 row_size * (y / tile_h) + tile_size * (x * res->bo_cpp / tile_w);
1069
1070 /*
1071 * Since res->bo_stride is a multiple of tile_w, slice_offset should be
1072 * aligned at this point.
1073 */
1074 assert(slice_offset % tile_size == 0);
1075
1076 if (tile_aligned) {
1077 /*
1078 * because of the possible values of align_i and align_j in
1079 * layout_tex_init(), x_offset must be a multiple of 4 and y_offset must
1080 * be a multiple of 2.
1081 */
1082 if (x_offset) {
1083 assert(tile_w % res->bo_cpp == 0);
1084 *x_offset = (x % (tile_w / res->bo_cpp)) * res->block_width;
1085 assert(*x_offset % 4 == 0);
1086 }
1087 if (y_offset) {
1088 *y_offset = (y % tile_h) * res->block_height;
1089 assert(*y_offset % 2 == 0);
1090 }
1091 }
1092 else {
1093 const unsigned tx = (x * res->bo_cpp) % tile_w;
1094 const unsigned ty = y % tile_h;
1095
1096 switch (res->tiling) {
1097 case INTEL_TILING_NONE:
1098 assert(tx == 0 && ty == 0);
1099 break;
1100 case INTEL_TILING_X:
1101 slice_offset += tile_w * ty + tx;
1102 break;
1103 case INTEL_TILING_Y:
1104 slice_offset += tile_h * 16 * (tx / 16) + ty * 16 + (tx % 16);
1105 break;
1106 }
1107
1108 if (x_offset)
1109 *x_offset = 0;
1110 if (y_offset)
1111 *y_offset = 0;
1112 }
1113
1114 return slice_offset;
1115 }