ilo: honor surface padding requirements
[mesa.git] / src / gallium / drivers / ilo / ilo_resource.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_screen.h"
29 #include "ilo_resource.h"
30
31 /* use PIPE_BIND_CUSTOM to indicate MCS */
32 #define ILO_BIND_MCS PIPE_BIND_CUSTOM
33
34 struct tex_layout {
35 const struct ilo_dev_info *dev;
36 const struct pipe_resource *templ;
37
38 enum pipe_format format;
39 unsigned block_width, block_height, block_size;
40 bool compressed;
41 bool has_depth, has_stencil, separate_stencil;
42
43 enum intel_tiling_mode tiling;
44 bool can_be_linear;
45
46 bool array_spacing_full;
47 bool interleaved;
48
49 struct {
50 int w, h, d;
51 struct ilo_texture_slice *slices;
52 } levels[PIPE_MAX_TEXTURE_LEVELS];
53
54 int align_i, align_j;
55 int qpitch;
56
57 int width, height;
58 };
59
60 static void
61 tex_layout_init_qpitch(struct tex_layout *layout)
62 {
63 const struct pipe_resource *templ = layout->templ;
64 int h0, h1;
65
66 if (templ->array_size <= 1)
67 return;
68
69 h0 = align(layout->levels[0].h, layout->align_j);
70
71 if (!layout->array_spacing_full) {
72 layout->qpitch = h0;
73 return;
74 }
75
76 h1 = align(layout->levels[1].h, layout->align_j);
77
78 /*
79 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
80 *
81 * "The following equation is used for surface formats other than
82 * compressed textures:
83 *
84 * QPitch = (h0 + h1 + 11j)"
85 *
86 * "The equation for compressed textures (BC* and FXT1 surface formats)
87 * follows:
88 *
89 * QPitch = (h0 + h1 + 11j) / 4"
90 *
91 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
92 * value calculated in the equation above, for every other odd Surface
93 * Height starting from 1 i.e. 1,5,9,13"
94 *
95 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
96 *
97 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
98 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
99 *
100 * QPitch = (h0 + h1 + 12j)
101 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
102 *
103 * (There are many typos or missing words here...)"
104 *
105 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
106 * the base address. The PRM divides QPitch by 4 for compressed formats
107 * because the block height for those formats are 4, and it wants QPitch to
108 * mean the number of memory rows, as opposed to texel rows, between
109 * slices. Since we use texel rows in tex->slice_offsets, we do not need
110 * to divide QPitch by 4.
111 */
112 layout->qpitch = h0 + h1 +
113 ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
114
115 if (layout->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 &&
116 templ->height0 % 4 == 1)
117 layout->qpitch += 4;
118 }
119
120 static void
121 tex_layout_init_alignments(struct tex_layout *layout)
122 {
123 const struct pipe_resource *templ = layout->templ;
124
125 /*
126 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
127 *
128 * "surface format align_i align_j
129 * YUV 4:2:2 formats 4 *see below
130 * BC1-5 4 4
131 * FXT1 8 4
132 * all other formats 4 *see below"
133 *
134 * "- align_j = 4 for any depth buffer
135 * - align_j = 2 for separate stencil buffer
136 * - align_j = 4 for any render target surface is multisampled (4x)
137 * - align_j = 4 for any render target surface with Surface Vertical
138 * Alignment = VALIGN_4
139 * - align_j = 2 for any render target surface with Surface Vertical
140 * Alignment = VALIGN_2
141 * - align_j = 2 for all other render target surface
142 * - align_j = 2 for any sampling engine surface with Surface Vertical
143 * Alignment = VALIGN_2
144 * - align_j = 4 for any sampling engine surface with Surface Vertical
145 * Alignment = VALIGN_4"
146 *
147 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
148 *
149 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
150 * the Surface Format is 96 bits per element (BPE)."
151 *
152 * They can be rephrased as
153 *
154 * align_i align_j
155 * compressed formats block width block height
156 * PIPE_FORMAT_S8_UINT 4 2
157 * other depth/stencil formats 4 4
158 * 4x multisampled 4 4
159 * bpp 96 4 2
160 * others 4 2 or 4
161 */
162
163 /*
164 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
165 *
166 * "surface defined by surface format align_i align_j
167 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
168 * not D16_UNORM 4 4
169 * 3DSTATE_STENCIL_BUFFER N/A 8 8
170 * SURFACE_STATE BC*, ETC*, EAC* 4 4
171 * FXT1 8 4
172 * all others (set by SURFACE_STATE)"
173 *
174 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
175 *
176 * "- This field (Surface Vertical Aligment) is intended to be set to
177 * VALIGN_4 if the surface was rendered as a depth buffer, for a
178 * multisampled (4x) render target, or for a multisampled (8x)
179 * render target, since these surfaces support only alignment of 4.
180 * - Use of VALIGN_4 for other surfaces is supported, but uses more
181 * memory.
182 * - This field must be set to VALIGN_4 for all tiled Y Render Target
183 * surfaces.
184 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
185 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
186 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
187 * must be set to VALIGN_4."
188 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
189 *
190 * "- This field (Surface Horizontal Aligment) is intended to be set to
191 * HALIGN_8 only if the surface was rendered as a depth buffer with
192 * Z16 format or a stencil buffer, since these surfaces support only
193 * alignment of 8.
194 * - Use of HALIGN_8 for other surfaces is supported, but uses more
195 * memory.
196 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
197 * - This field must be set to HALIGN_8 if the Surface Format is
198 * FXT1."
199 *
200 * They can be rephrased as
201 *
202 * align_i align_j
203 * compressed formats block width block height
204 * PIPE_FORMAT_Z16_UNORM 8 4
205 * PIPE_FORMAT_S8_UINT 8 8
206 * other depth/stencil formats 4 or 8 4
207 * 2x or 4x multisampled 4 or 8 4
208 * tiled Y 4 or 8 4 (if rt)
209 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
210 * others 4 or 8 2 or 4
211 */
212
213 if (layout->compressed) {
214 /* this happens to be the case */
215 layout->align_i = layout->block_width;
216 layout->align_j = layout->block_height;
217 }
218 else if (layout->has_depth || layout->has_stencil) {
219 if (layout->dev->gen >= ILO_GEN(7)) {
220 switch (layout->format) {
221 case PIPE_FORMAT_Z16_UNORM:
222 layout->align_i = 8;
223 layout->align_j = 4;
224 break;
225 case PIPE_FORMAT_S8_UINT:
226 layout->align_i = 8;
227 layout->align_j = 8;
228 break;
229 default:
230 /*
231 * From the Ivy Bridge PRM, volume 2 part 1, page 319:
232 *
233 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and
234 * Depth Coordinate Offset X) must be zero to ensure correct
235 * alignment"
236 *
237 * We will make use of them and setting align_i to 8 help us meet
238 * the requirement.
239 */
240 layout->align_i = (templ->last_level > 0) ? 8 : 4;
241 layout->align_j = 4;
242 break;
243 }
244 }
245 else {
246 switch (layout->format) {
247 case PIPE_FORMAT_S8_UINT:
248 layout->align_i = 4;
249 layout->align_j = 2;
250 break;
251 default:
252 layout->align_i = 4;
253 layout->align_j = 4;
254 break;
255 }
256 }
257 }
258 else {
259 const bool valign_4 = (templ->nr_samples > 1) ||
260 (layout->dev->gen >= ILO_GEN(7) &&
261 layout->tiling == INTEL_TILING_Y &&
262 (templ->bind & PIPE_BIND_RENDER_TARGET));
263
264 if (valign_4)
265 assert(layout->block_size != 12);
266
267 layout->align_i = 4;
268 layout->align_j = (valign_4) ? 4 : 2;
269 }
270
271 /*
272 * the fact that align i and j are multiples of block width and height
273 * respectively is what makes the size of the bo a multiple of the block
274 * size, slices start at block boundaries, and many of the computations
275 * work.
276 */
277 assert(layout->align_i % layout->block_width == 0);
278 assert(layout->align_j % layout->block_height == 0);
279
280 /* make sure align() works */
281 assert(util_is_power_of_two(layout->align_i) &&
282 util_is_power_of_two(layout->align_j));
283 assert(util_is_power_of_two(layout->block_width) &&
284 util_is_power_of_two(layout->block_height));
285 }
286
287 static void
288 tex_layout_init_levels(struct tex_layout *layout)
289 {
290 const struct pipe_resource *templ = layout->templ;
291 int last_level, lv;
292
293 last_level = templ->last_level;
294
295 /* need at least 2 levels to compute full qpitch */
296 if (last_level == 0 && templ->array_size > 1 && layout->array_spacing_full)
297 last_level++;
298
299 /* compute mip level sizes */
300 for (lv = 0; lv <= last_level; lv++) {
301 int w, h, d;
302
303 w = u_minify(templ->width0, lv);
304 h = u_minify(templ->height0, lv);
305 d = u_minify(templ->depth0, lv);
306
307 /*
308 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
309 *
310 * "The dimensions of the mip maps are first determined by applying
311 * the sizing algorithm presented in Non-Power-of-Two Mipmaps
312 * above. Then, if necessary, they are padded out to compression
313 * block boundaries."
314 */
315 w = align(w, layout->block_width);
316 h = align(h, layout->block_height);
317
318 /*
319 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
320 *
321 * "If the surface is multisampled (4x), these values must be
322 * adjusted as follows before proceeding:
323 *
324 * W_L = ceiling(W_L / 2) * 4
325 * H_L = ceiling(H_L / 2) * 4"
326 *
327 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
328 *
329 * "If the surface is multisampled and it is a depth or stencil
330 * surface or Multisampled Surface StorageFormat in SURFACE_STATE
331 * is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows
332 * before proceeding:
333 *
334 * #samples W_L = H_L =
335 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
336 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
337 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
338 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
339 *
340 * For interleaved samples (4x), where pixels
341 *
342 * (x, y ) (x+1, y )
343 * (x, y+1) (x+1, y+1)
344 *
345 * would be is occupied by
346 *
347 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
348 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
349 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
350 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
351 *
352 * Thus the need to
353 *
354 * w = align(w, 2) * 2;
355 * y = align(y, 2) * 2;
356 */
357 if (layout->interleaved) {
358 switch (templ->nr_samples) {
359 case 0:
360 case 1:
361 break;
362 case 2:
363 w = align(w, 2) * 2;
364 break;
365 case 4:
366 w = align(w, 2) * 2;
367 h = align(h, 2) * 2;
368 break;
369 case 8:
370 w = align(w, 2) * 4;
371 h = align(h, 2) * 2;
372 break;
373 case 16:
374 w = align(w, 2) * 4;
375 h = align(h, 2) * 4;
376 break;
377 default:
378 assert(!"unsupported sample count");
379 break;
380 }
381 }
382
383 layout->levels[lv].w = w;
384 layout->levels[lv].h = h;
385 layout->levels[lv].d = d;
386 }
387 }
388
389 static void
390 tex_layout_init_spacing(struct tex_layout *layout)
391 {
392 const struct pipe_resource *templ = layout->templ;
393
394 if (layout->dev->gen >= ILO_GEN(7)) {
395 /*
396 * It is not explicitly states, but render targets are expected to be
397 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are
398 * expected to be IMS (samples interleaved).
399 *
400 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
401 */
402 if (layout->has_depth || layout->has_stencil) {
403 layout->interleaved = true;
404
405 /*
406 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
407 *
408 * "note that the depth buffer and stencil buffer have an implied
409 * value of ARYSPC_FULL"
410 */
411 layout->array_spacing_full = true;
412 }
413 else {
414 layout->interleaved = false;
415
416 /*
417 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
418 *
419 * "If Multisampled Surface Storage Format is MSFMT_MSS and
420 * Number of Multisamples is not MULTISAMPLECOUNT_1, this field
421 * (Surface Array Spacing) must be set to ARYSPC_LOD0."
422 *
423 * As multisampled resources are not mipmapped, we never use
424 * ARYSPC_FULL for them.
425 */
426 if (templ->nr_samples > 1)
427 assert(templ->last_level == 0);
428 layout->array_spacing_full = (templ->last_level > 0);
429 }
430 }
431 else {
432 /* GEN6 supports only interleaved samples */
433 layout->interleaved = true;
434
435 /*
436 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
437 *
438 * "The separate stencil buffer does not support mip mapping, thus
439 * the storage for LODs other than LOD 0 is not needed. The
440 * following QPitch equation applies only to the separate stencil
441 * buffer:
442 *
443 * QPitch = h_0"
444 *
445 * GEN6 does not support compact spacing otherwise.
446 */
447 layout->array_spacing_full = (layout->format != PIPE_FORMAT_S8_UINT);
448 }
449 }
450
451 static void
452 tex_layout_init_tiling(struct tex_layout *layout)
453 {
454 const struct pipe_resource *templ = layout->templ;
455 const enum pipe_format format = layout->format;
456 const unsigned tile_none = 1 << INTEL_TILING_NONE;
457 const unsigned tile_x = 1 << INTEL_TILING_X;
458 const unsigned tile_y = 1 << INTEL_TILING_Y;
459 unsigned valid_tilings = tile_none | tile_x | tile_y;
460
461 /*
462 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
463 *
464 * "Display/Overlay Y-Major not supported.
465 * X-Major required for Async Flips"
466 */
467 if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
468 valid_tilings &= tile_x;
469
470 /*
471 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
472 *
473 * "The cursor surface address must be 4K byte aligned. The cursor must
474 * be in linear memory, it cannot be tiled."
475 */
476 if (unlikely(templ->bind & PIPE_BIND_CURSOR))
477 valid_tilings &= tile_none;
478
479 /*
480 * From the Ivy Bridge PRM, volume 4 part 1, page 76:
481 *
482 * "The MCS surface must be stored as Tile Y."
483 */
484 if (templ->bind & ILO_BIND_MCS)
485 valid_tilings &= tile_y;
486
487 /*
488 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
489 *
490 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
491 * Depth Buffer is not supported."
492 *
493 * "The Depth Buffer, if tiled, must use Y-Major tiling."
494 *
495 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
496 *
497 * "W-Major Tile Format is used for separate stencil."
498 *
499 * Since the HW does not support W-tiled fencing, we have to do it in the
500 * driver.
501 */
502 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
503 switch (format) {
504 case PIPE_FORMAT_S8_UINT:
505 valid_tilings &= tile_none;
506 break;
507 default:
508 valid_tilings &= tile_y;
509 break;
510 }
511 }
512
513 if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
514 if (templ->bind & PIPE_BIND_RENDER_TARGET) {
515 /*
516 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
517 *
518 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
519 * either TileX or Linear."
520 */
521 if (layout->block_size == 16)
522 valid_tilings &= ~tile_y;
523
524 /*
525 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
526 *
527 * "This field (Surface Vertical Aligment) must be set to
528 * VALIGN_4 for all tiled Y Render Target surfaces."
529 *
530 * "VALIGN_4 is not supported for surface format
531 * R32G32B32_FLOAT."
532 */
533 if (layout->dev->gen >= ILO_GEN(7) && layout->block_size == 12)
534 valid_tilings &= ~tile_y;
535 }
536
537 /*
538 * Also, heuristically set a minimum width/height for enabling tiling.
539 */
540 if (templ->width0 < 64 && (valid_tilings & ~tile_x))
541 valid_tilings &= ~tile_x;
542
543 if ((templ->width0 < 32 || templ->height0 < 16) &&
544 (templ->width0 < 16 || templ->height0 < 32) &&
545 (valid_tilings & ~tile_y))
546 valid_tilings &= ~tile_y;
547 }
548 else {
549 /* force linear if we are not sure where the texture is bound to */
550 if (valid_tilings & tile_none)
551 valid_tilings &= tile_none;
552 }
553
554 /* no conflicting binding flags */
555 assert(valid_tilings);
556
557 /* prefer tiled than linear */
558 if (valid_tilings & tile_y)
559 layout->tiling = INTEL_TILING_Y;
560 else if (valid_tilings & tile_x)
561 layout->tiling = INTEL_TILING_X;
562 else
563 layout->tiling = INTEL_TILING_NONE;
564
565 layout->can_be_linear = valid_tilings & tile_none;
566 }
567
568 static void
569 tex_layout_init_format(struct tex_layout *layout)
570 {
571 const struct pipe_resource *templ = layout->templ;
572 enum pipe_format format;
573 const struct util_format_description *desc;
574 bool separate_stencil;
575
576 /* GEN7+ requires separate stencil buffers */
577 separate_stencil = (layout->dev->gen >= ILO_GEN(7));
578
579 switch (templ->format) {
580 case PIPE_FORMAT_ETC1_RGB8:
581 format = PIPE_FORMAT_R8G8B8X8_UNORM;
582 break;
583 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
584 if (separate_stencil) {
585 format = PIPE_FORMAT_Z24X8_UNORM;
586 layout->separate_stencil = true;
587 }
588 else {
589 format = templ->format;
590 }
591 break;
592 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
593 if (separate_stencil) {
594 format = PIPE_FORMAT_Z32_FLOAT;
595 layout->separate_stencil = true;
596 }
597 else {
598 format = templ->format;
599 }
600 break;
601 default:
602 format = templ->format;
603 break;
604 }
605
606 layout->format = format;
607
608 layout->block_width = util_format_get_blockwidth(format);
609 layout->block_height = util_format_get_blockheight(format);
610 layout->block_size = util_format_get_blocksize(format);
611 layout->compressed = util_format_is_compressed(format);
612
613 desc = util_format_description(format);
614 layout->has_depth = util_format_has_depth(desc);
615 layout->has_stencil = util_format_has_stencil(desc);
616 }
617
618 static void
619 tex_layout_init(struct tex_layout *layout,
620 struct pipe_screen *screen,
621 const struct pipe_resource *templ,
622 struct ilo_texture_slice **slices)
623 {
624 struct ilo_screen *is = ilo_screen(screen);
625
626 memset(layout, 0, sizeof(*layout));
627
628 layout->dev = &is->dev;
629 layout->templ = templ;
630
631 /* note that there are dependencies between these functions */
632 tex_layout_init_format(layout);
633 tex_layout_init_tiling(layout);
634 tex_layout_init_spacing(layout);
635 tex_layout_init_levels(layout);
636 tex_layout_init_alignments(layout);
637 tex_layout_init_qpitch(layout);
638
639 if (slices) {
640 int lv;
641
642 for (lv = 0; lv <= templ->last_level; lv++)
643 layout->levels[lv].slices = slices[lv];
644 }
645 }
646
647 static bool
648 tex_layout_force_linear(struct tex_layout *layout)
649 {
650 if (!layout->can_be_linear)
651 return false;
652
653 /*
654 * we may be able to switch from VALIGN_4 to VALIGN_2 when the layout was
655 * Y-tiled, but let's keep it simple
656 */
657 layout->tiling = INTEL_TILING_NONE;
658
659 return true;
660 }
661
662 /**
663 * Layout a 2D texture.
664 */
665 static void
666 tex_layout_2d(struct tex_layout *layout)
667 {
668 const struct pipe_resource *templ = layout->templ;
669 unsigned int level_x, level_y, num_slices;
670 int lv;
671
672 level_x = 0;
673 level_y = 0;
674 for (lv = 0; lv <= templ->last_level; lv++) {
675 const unsigned int level_w = layout->levels[lv].w;
676 const unsigned int level_h = layout->levels[lv].h;
677 int slice;
678
679 /* set slice offsets */
680 if (layout->levels[lv].slices) {
681 for (slice = 0; slice < templ->array_size; slice++) {
682 layout->levels[lv].slices[slice].x = level_x;
683 /* slices are qpitch apart in Y-direction */
684 layout->levels[lv].slices[slice].y =
685 level_y + layout->qpitch * slice;
686 }
687 }
688
689 /* extend the size of the monolithic bo to cover this mip level */
690 if (layout->width < level_x + level_w)
691 layout->width = level_x + level_w;
692 if (layout->height < level_y + level_h)
693 layout->height = level_y + level_h;
694
695 /* MIPLAYOUT_BELOW */
696 if (lv == 1)
697 level_x += align(level_w, layout->align_i);
698 else
699 level_y += align(level_h, layout->align_j);
700 }
701
702 num_slices = templ->array_size;
703 /* samples of the same index are stored in a slice */
704 if (templ->nr_samples > 1 && !layout->interleaved)
705 num_slices *= templ->nr_samples;
706
707 /* we did not take slices into consideration in the computation above */
708 layout->height += layout->qpitch * (num_slices - 1);
709 }
710
711 /**
712 * Layout a 3D texture.
713 */
714 static void
715 tex_layout_3d(struct tex_layout *layout)
716 {
717 const struct pipe_resource *templ = layout->templ;
718 unsigned int level_y;
719 int lv;
720
721 level_y = 0;
722 for (lv = 0; lv <= templ->last_level; lv++) {
723 const unsigned int level_w = layout->levels[lv].w;
724 const unsigned int level_h = layout->levels[lv].h;
725 const unsigned int level_d = layout->levels[lv].d;
726 const unsigned int slice_pitch = align(level_w, layout->align_i);
727 const unsigned int slice_qpitch = align(level_h, layout->align_j);
728 const unsigned int num_slices_per_row = 1 << lv;
729 int slice;
730
731 for (slice = 0; slice < level_d; slice += num_slices_per_row) {
732 int i;
733
734 /* set slice offsets */
735 if (layout->levels[lv].slices) {
736 for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
737 layout->levels[lv].slices[slice + i].x = slice_pitch * i;
738 layout->levels[lv].slices[slice + i].y = level_y;
739 }
740 }
741
742 /* move on to the next slice row */
743 level_y += slice_qpitch;
744 }
745
746 /* rightmost slice */
747 slice = MIN2(num_slices_per_row, level_d) - 1;
748
749 /* extend the size of the monolithic bo to cover this slice */
750 if (layout->width < slice_pitch * slice + level_w)
751 layout->width = slice_pitch * slice + level_w;
752 if (lv == templ->last_level)
753 layout->height = (level_y - slice_qpitch) + level_h;
754 }
755 }
756
757 static void
758 tex_layout_validate(struct tex_layout *layout)
759 {
760 /*
761 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
762 *
763 * "To determine the necessary padding on the bottom and right side of
764 * the surface, refer to the table in Section 7.18.3.4 for the i and j
765 * parameters for the surface format in use. The surface must then be
766 * extended to the next multiple of the alignment unit size in each
767 * dimension, and all texels contained in this extended surface must
768 * have valid GTT entries."
769 *
770 * "For cube surfaces, an additional two rows of padding are required
771 * at the bottom of the surface. This must be ensured regardless of
772 * whether the surface is stored tiled or linear. This is due to the
773 * potential rotation of cache line orientation from memory to cache."
774 *
775 * "For compressed textures (BC* and FXT1 surface formats), padding at
776 * the bottom of the surface is to an even compressed row, which is
777 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
778 * purposes, these surfaces behave as if j = 8 only for surface
779 * padding purposes. The value of 4 for j still applies for mip level
780 * alignment and QPitch calculation."
781 */
782 if (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) {
783 layout->width = align(layout->width, layout->align_i);
784 layout->height = align(layout->height, layout->align_j);
785
786 if (layout->templ->target == PIPE_TEXTURE_CUBE)
787 layout->height += 2;
788
789 if (layout->compressed)
790 layout->height = align(layout->height, layout->align_j * 2);
791 }
792
793 /*
794 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
795 *
796 * "If the surface contains an odd number of rows of data, a final row
797 * below the surface must be allocated."
798 */
799 if (layout->templ->bind & PIPE_BIND_RENDER_TARGET)
800 layout->height = align(layout->height, 2);
801
802 /*
803 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
804 *
805 * "A 4KB tile is subdivided into 8-high by 8-wide array of Blocks for
806 * W-Major Tiles (W Tiles). Each Block is 8 rows by 8 bytes."
807 *
808 * Since we ask for INTEL_TILING_NONE instead of the non-existent
809 * INTEL_TILING_W, we need to manually align the width and height to the
810 * tile boundaries.
811 */
812 if (layout->templ->format == PIPE_FORMAT_S8_UINT) {
813 layout->width = align(layout->width, 64);
814 layout->height = align(layout->height, 64);
815 }
816
817 assert(layout->width % layout->block_width == 0);
818 assert(layout->height % layout->block_height == 0);
819 assert(layout->qpitch % layout->block_height == 0);
820 }
821
822 static size_t
823 tex_layout_estimate_size(const struct tex_layout *layout)
824 {
825 unsigned stride, height;
826
827 stride = (layout->width / layout->block_width) * layout->block_size;
828 height = layout->height / layout->block_height;
829
830 switch (layout->tiling) {
831 case INTEL_TILING_X:
832 stride = align(stride, 512);
833 height = align(height, 8);
834 break;
835 case INTEL_TILING_Y:
836 stride = align(stride, 128);
837 height = align(height, 32);
838 break;
839 default:
840 height = align(height, 2);
841 break;
842 }
843
844 return stride * height;
845 }
846
847 static void
848 tex_layout_apply(const struct tex_layout *layout, struct ilo_texture *tex)
849 {
850 tex->bo_format = layout->format;
851
852 /* in blocks */
853 tex->bo_width = layout->width / layout->block_width;
854 tex->bo_height = layout->height / layout->block_height;
855 tex->bo_cpp = layout->block_size;
856 tex->tiling = layout->tiling;
857
858 tex->compressed = layout->compressed;
859 tex->block_width = layout->block_width;
860 tex->block_height = layout->block_height;
861
862 tex->halign_8 = (layout->align_i == 8);
863 tex->valign_4 = (layout->align_j == 4);
864 tex->array_spacing_full = layout->array_spacing_full;
865 tex->interleaved = layout->interleaved;
866 }
867
868 static void
869 tex_free_slices(struct ilo_texture *tex)
870 {
871 FREE(tex->slice_offsets[0]);
872 }
873
874 static bool
875 tex_alloc_slices(struct ilo_texture *tex)
876 {
877 const struct pipe_resource *templ = &tex->base;
878 struct ilo_texture_slice *slices;
879 int depth, lv;
880
881 /* sum the depths of all levels */
882 depth = 0;
883 for (lv = 0; lv <= templ->last_level; lv++)
884 depth += u_minify(templ->depth0, lv);
885
886 /*
887 * There are (depth * tex->base.array_size) slices in total. Either depth
888 * is one (non-3D) or templ->array_size is one (non-array), but it does
889 * not matter.
890 */
891 slices = CALLOC(depth * templ->array_size, sizeof(*slices));
892 if (!slices)
893 return false;
894
895 tex->slice_offsets[0] = slices;
896
897 /* point to the respective positions in the buffer */
898 for (lv = 1; lv <= templ->last_level; lv++) {
899 tex->slice_offsets[lv] = tex->slice_offsets[lv - 1] +
900 u_minify(templ->depth0, lv - 1) * templ->array_size;
901 }
902
903 return true;
904 }
905
906 static bool
907 tex_create_bo(struct ilo_texture *tex,
908 const struct winsys_handle *handle)
909 {
910 struct ilo_screen *is = ilo_screen(tex->base.screen);
911 const char *name;
912 struct intel_bo *bo;
913 enum intel_tiling_mode tiling;
914 unsigned long pitch;
915
916 switch (tex->base.target) {
917 case PIPE_TEXTURE_1D:
918 name = "1D texture";
919 break;
920 case PIPE_TEXTURE_2D:
921 name = "2D texture";
922 break;
923 case PIPE_TEXTURE_3D:
924 name = "3D texture";
925 break;
926 case PIPE_TEXTURE_CUBE:
927 name = "cube texture";
928 break;
929 case PIPE_TEXTURE_RECT:
930 name = "rectangle texture";
931 break;
932 case PIPE_TEXTURE_1D_ARRAY:
933 name = "1D array texture";
934 break;
935 case PIPE_TEXTURE_2D_ARRAY:
936 name = "2D array texture";
937 break;
938 case PIPE_TEXTURE_CUBE_ARRAY:
939 name = "cube array texture";
940 break;
941 default:
942 name ="unknown texture";
943 break;
944 }
945
946 if (handle) {
947 bo = intel_winsys_import_handle(is->winsys, name, handle,
948 tex->bo_width, tex->bo_height, tex->bo_cpp,
949 &tiling, &pitch);
950 }
951 else {
952 bo = intel_winsys_alloc_texture(is->winsys, name,
953 tex->bo_width, tex->bo_height, tex->bo_cpp,
954 tex->tiling, tex->bo_flags, &pitch);
955
956 tiling = tex->tiling;
957 }
958
959 if (!bo)
960 return false;
961
962 if (tex->bo)
963 intel_bo_unreference(tex->bo);
964
965 tex->bo = bo;
966 tex->tiling = tiling;
967 tex->bo_stride = pitch;
968
969 return true;
970 }
971
972 static void
973 tex_destroy(struct ilo_texture *tex)
974 {
975 if (tex->separate_s8)
976 tex_destroy(tex->separate_s8);
977
978 intel_bo_unreference(tex->bo);
979 tex_free_slices(tex);
980 FREE(tex);
981 }
982
983 static struct pipe_resource *
984 tex_create(struct pipe_screen *screen,
985 const struct pipe_resource *templ,
986 const struct winsys_handle *handle)
987 {
988 struct tex_layout layout;
989 struct ilo_texture *tex;
990
991 tex = CALLOC_STRUCT(ilo_texture);
992 if (!tex)
993 return NULL;
994
995 tex->base = *templ;
996 tex->base.screen = screen;
997 pipe_reference_init(&tex->base.reference, 1);
998
999 if (!tex_alloc_slices(tex)) {
1000 FREE(tex);
1001 return NULL;
1002 }
1003
1004 tex->imported = (handle != NULL);
1005
1006 if (tex->base.bind & (PIPE_BIND_DEPTH_STENCIL |
1007 PIPE_BIND_RENDER_TARGET))
1008 tex->bo_flags |= INTEL_ALLOC_FOR_RENDER;
1009
1010 tex_layout_init(&layout, screen, templ, tex->slice_offsets);
1011
1012 switch (templ->target) {
1013 case PIPE_TEXTURE_1D:
1014 case PIPE_TEXTURE_2D:
1015 case PIPE_TEXTURE_CUBE:
1016 case PIPE_TEXTURE_RECT:
1017 case PIPE_TEXTURE_1D_ARRAY:
1018 case PIPE_TEXTURE_2D_ARRAY:
1019 case PIPE_TEXTURE_CUBE_ARRAY:
1020 tex_layout_2d(&layout);
1021 break;
1022 case PIPE_TEXTURE_3D:
1023 tex_layout_3d(&layout);
1024 break;
1025 default:
1026 assert(!"unknown resource target");
1027 break;
1028 }
1029
1030 tex_layout_validate(&layout);
1031
1032 /* make sure the bo can be mapped through GTT if tiled */
1033 if (layout.tiling != INTEL_TILING_NONE) {
1034 /*
1035 * Usually only the first 256MB of the GTT is mappable.
1036 *
1037 * See also how intel_context::max_gtt_map_object_size is calculated.
1038 */
1039 const size_t mappable_gtt_size = 256 * 1024 * 1024;
1040 const size_t size = tex_layout_estimate_size(&layout);
1041
1042 /* be conservative */
1043 if (size > mappable_gtt_size / 4)
1044 tex_layout_force_linear(&layout);
1045 }
1046
1047 tex_layout_apply(&layout, tex);
1048
1049 if (!tex_create_bo(tex, handle)) {
1050 tex_free_slices(tex);
1051 FREE(tex);
1052 return NULL;
1053 }
1054
1055 /* allocate separate stencil resource */
1056 if (layout.separate_stencil) {
1057 struct pipe_resource s8_templ = *layout.templ;
1058 struct pipe_resource *s8;
1059
1060 /*
1061 * Unless PIPE_BIND_DEPTH_STENCIL is set, the resource may have other
1062 * tilings. But that should be fine since it will never be bound as the
1063 * stencil buffer, and our transfer code can handle all tilings.
1064 */
1065 s8_templ.format = PIPE_FORMAT_S8_UINT;
1066
1067 s8 = screen->resource_create(screen, &s8_templ);
1068 if (!s8) {
1069 tex_destroy(tex);
1070 return NULL;
1071 }
1072
1073 tex->separate_s8 = ilo_texture(s8);
1074
1075 assert(tex->separate_s8->bo_format == PIPE_FORMAT_S8_UINT);
1076 }
1077
1078 return &tex->base;
1079 }
1080
1081 static bool
1082 tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle)
1083 {
1084 struct ilo_screen *is = ilo_screen(tex->base.screen);
1085 int err;
1086
1087 err = intel_winsys_export_handle(is->winsys, tex->bo,
1088 tex->tiling, tex->bo_stride, handle);
1089
1090 return !err;
1091 }
1092
1093 /**
1094 * Estimate the texture size. For large textures, the errors should be pretty
1095 * small.
1096 */
1097 static size_t
1098 tex_estimate_size(struct pipe_screen *screen,
1099 const struct pipe_resource *templ)
1100 {
1101 struct tex_layout layout;
1102
1103 tex_layout_init(&layout, screen, templ, NULL);
1104
1105 switch (templ->target) {
1106 case PIPE_TEXTURE_3D:
1107 tex_layout_3d(&layout);
1108 break;
1109 default:
1110 tex_layout_2d(&layout);
1111 break;
1112 }
1113
1114 tex_layout_validate(&layout);
1115
1116 return tex_layout_estimate_size(&layout);
1117 }
1118
1119 static bool
1120 buf_create_bo(struct ilo_buffer *buf)
1121 {
1122 struct ilo_screen *is = ilo_screen(buf->base.screen);
1123 const char *name;
1124 struct intel_bo *bo;
1125
1126 switch (buf->base.bind) {
1127 case PIPE_BIND_VERTEX_BUFFER:
1128 name = "vertex buffer";
1129 break;
1130 case PIPE_BIND_INDEX_BUFFER:
1131 name = "index buffer";
1132 break;
1133 case PIPE_BIND_CONSTANT_BUFFER:
1134 name = "constant buffer";
1135 break;
1136 case PIPE_BIND_STREAM_OUTPUT:
1137 name = "stream output";
1138 break;
1139 default:
1140 name = "unknown buffer";
1141 break;
1142 }
1143
1144 bo = intel_winsys_alloc_buffer(is->winsys,
1145 name, buf->bo_size, buf->bo_flags);
1146 if (!bo)
1147 return false;
1148
1149 if (buf->bo)
1150 intel_bo_unreference(buf->bo);
1151
1152 buf->bo = bo;
1153
1154 return true;
1155 }
1156
1157 static void
1158 buf_destroy(struct ilo_buffer *buf)
1159 {
1160 intel_bo_unreference(buf->bo);
1161 FREE(buf);
1162 }
1163
1164 static struct pipe_resource *
1165 buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
1166 {
1167 struct ilo_buffer *buf;
1168
1169 buf = CALLOC_STRUCT(ilo_buffer);
1170 if (!buf)
1171 return NULL;
1172
1173 buf->base = *templ;
1174 buf->base.screen = screen;
1175 pipe_reference_init(&buf->base.reference, 1);
1176
1177 buf->bo_size = templ->width0;
1178 buf->bo_flags = 0;
1179
1180 /*
1181 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
1182 *
1183 * "For buffers, which have no inherent "height," padding requirements
1184 * are different. A buffer must be padded to the next multiple of 256
1185 * array elements, with an additional 16 bytes added beyond that to
1186 * account for the L1 cache line."
1187 */
1188 if (templ->bind & PIPE_BIND_SAMPLER_VIEW)
1189 buf->bo_size = align(buf->bo_size, 256) + 16;
1190
1191 if (!buf_create_bo(buf)) {
1192 FREE(buf);
1193 return NULL;
1194 }
1195
1196 return &buf->base;
1197 }
1198
1199 static boolean
1200 ilo_can_create_resource(struct pipe_screen *screen,
1201 const struct pipe_resource *templ)
1202 {
1203 /*
1204 * We do not know if we will fail until we try to allocate the bo.
1205 * So just set a limit on the texture size.
1206 */
1207 const size_t max_size = 1 * 1024 * 1024 * 1024;
1208 size_t size;
1209
1210 if (templ->target == PIPE_BUFFER)
1211 size = templ->width0;
1212 else
1213 size = tex_estimate_size(screen, templ);
1214
1215 return (size <= max_size);
1216 }
1217
1218 static struct pipe_resource *
1219 ilo_resource_create(struct pipe_screen *screen,
1220 const struct pipe_resource *templ)
1221 {
1222 if (templ->target == PIPE_BUFFER)
1223 return buf_create(screen, templ);
1224 else
1225 return tex_create(screen, templ, NULL);
1226 }
1227
1228 static struct pipe_resource *
1229 ilo_resource_from_handle(struct pipe_screen *screen,
1230 const struct pipe_resource *templ,
1231 struct winsys_handle *handle)
1232 {
1233 if (templ->target == PIPE_BUFFER)
1234 return NULL;
1235 else
1236 return tex_create(screen, templ, handle);
1237 }
1238
1239 static boolean
1240 ilo_resource_get_handle(struct pipe_screen *screen,
1241 struct pipe_resource *res,
1242 struct winsys_handle *handle)
1243 {
1244 if (res->target == PIPE_BUFFER)
1245 return false;
1246 else
1247 return tex_get_handle(ilo_texture(res), handle);
1248
1249 }
1250
1251 static void
1252 ilo_resource_destroy(struct pipe_screen *screen,
1253 struct pipe_resource *res)
1254 {
1255 if (res->target == PIPE_BUFFER)
1256 buf_destroy(ilo_buffer(res));
1257 else
1258 tex_destroy(ilo_texture(res));
1259 }
1260
1261 /**
1262 * Initialize resource-related functions.
1263 */
1264 void
1265 ilo_init_resource_functions(struct ilo_screen *is)
1266 {
1267 is->base.can_create_resource = ilo_can_create_resource;
1268 is->base.resource_create = ilo_resource_create;
1269 is->base.resource_from_handle = ilo_resource_from_handle;
1270 is->base.resource_get_handle = ilo_resource_get_handle;
1271 is->base.resource_destroy = ilo_resource_destroy;
1272 }
1273
1274 bool
1275 ilo_buffer_alloc_bo(struct ilo_buffer *buf)
1276 {
1277 return buf_create_bo(buf);
1278 }
1279
1280 bool
1281 ilo_texture_alloc_bo(struct ilo_texture *tex)
1282 {
1283 /* a shared bo cannot be reallocated */
1284 if (tex->imported)
1285 return false;
1286
1287 return tex_create_bo(tex, NULL);
1288 }
1289
1290 /**
1291 * Return the offset (in bytes) to a slice within the bo.
1292 *
1293 * The returned offset is aligned to tile size. Since slices are not
1294 * guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
1295 * from the tile origin to the slice are also returned. X offset is always a
1296 * multiple of 4 and Y offset is always a multiple of 2.
1297 */
1298 unsigned
1299 ilo_texture_get_slice_offset(const struct ilo_texture *tex,
1300 int level, int slice,
1301 unsigned *x_offset, unsigned *y_offset)
1302 {
1303 unsigned tile_w, tile_h, tile_size, row_size;
1304 unsigned x, y, slice_offset;
1305
1306 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1307
1308 switch (tex->tiling) {
1309 case INTEL_TILING_NONE:
1310 /* W-tiled */
1311 if (tex->bo_format == PIPE_FORMAT_S8_UINT) {
1312 tile_w = 64;
1313 tile_h = 64;
1314 }
1315 else {
1316 tile_w = 1;
1317 tile_h = 1;
1318 }
1319 break;
1320 case INTEL_TILING_X:
1321 tile_w = 512;
1322 tile_h = 8;
1323 break;
1324 case INTEL_TILING_Y:
1325 tile_w = 128;
1326 tile_h = 32;
1327 break;
1328 default:
1329 assert(!"unknown tiling");
1330 tile_w = 1;
1331 tile_h = 1;
1332 break;
1333 }
1334
1335 tile_size = tile_w * tile_h;
1336 row_size = tex->bo_stride * tile_h;
1337
1338 /* in bytes */
1339 x = tex->slice_offsets[level][slice].x / tex->block_width * tex->bo_cpp;
1340 y = tex->slice_offsets[level][slice].y / tex->block_height;
1341 slice_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
1342
1343 /*
1344 * Since tex->bo_stride is a multiple of tile_w, slice_offset should be
1345 * aligned at this point.
1346 */
1347 assert(slice_offset % tile_size == 0);
1348
1349 /*
1350 * because of the possible values of align_i and align_j in
1351 * tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
1352 * 4 and y_offset is guaranteed to be a multiple of 2.
1353 */
1354 if (x_offset) {
1355 /* in pixels */
1356 x = (x % tile_w) / tex->bo_cpp * tex->block_width;
1357 assert(x % 4 == 0);
1358
1359 *x_offset = x;
1360 }
1361
1362 if (y_offset) {
1363 /* in pixels */
1364 y = (y % tile_h) * tex->block_height;
1365 assert(y % 2 == 0);
1366
1367 *y_offset = y;
1368 }
1369
1370 return slice_offset;
1371 }