ilo: add BLT-based blitting methods to ilo_blitter
[mesa.git] / src / gallium / drivers / ilo / ilo_resource.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_screen.h"
29 #include "ilo_resource.h"
30
31 /* use PIPE_BIND_CUSTOM to indicate MCS */
32 #define ILO_BIND_MCS PIPE_BIND_CUSTOM
33
34 struct tex_layout {
35 const struct ilo_dev_info *dev;
36 const struct pipe_resource *templ;
37
38 enum pipe_format format;
39 unsigned block_width, block_height, block_size;
40 bool compressed;
41 bool has_depth, has_stencil, separate_stencil;
42
43 enum intel_tiling_mode tiling;
44 bool can_be_linear;
45
46 bool array_spacing_full;
47 bool interleaved;
48
49 struct {
50 int w, h, d;
51 struct ilo_texture_slice *slices;
52 } levels[PIPE_MAX_TEXTURE_LEVELS];
53
54 int align_i, align_j;
55 int qpitch;
56
57 int width, height;
58 };
59
60 static void
61 tex_layout_init_qpitch(struct tex_layout *layout)
62 {
63 const struct pipe_resource *templ = layout->templ;
64 int h0, h1;
65
66 if (templ->array_size <= 1)
67 return;
68
69 h0 = align(layout->levels[0].h, layout->align_j);
70
71 if (!layout->array_spacing_full) {
72 layout->qpitch = h0;
73 return;
74 }
75
76 h1 = align(layout->levels[1].h, layout->align_j);
77
78 /*
79 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
80 *
81 * "The following equation is used for surface formats other than
82 * compressed textures:
83 *
84 * QPitch = (h0 + h1 + 11j)"
85 *
86 * "The equation for compressed textures (BC* and FXT1 surface formats)
87 * follows:
88 *
89 * QPitch = (h0 + h1 + 11j) / 4"
90 *
91 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
92 * value calculated in the equation above, for every other odd Surface
93 * Height starting from 1 i.e. 1,5,9,13"
94 *
95 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
96 *
97 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
98 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
99 *
100 * QPitch = (h0 + h1 + 12j)
101 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
102 *
103 * (There are many typos or missing words here...)"
104 *
105 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
106 * the base address. The PRM divides QPitch by 4 for compressed formats
107 * because the block height for those formats are 4, and it wants QPitch to
108 * mean the number of memory rows, as opposed to texel rows, between
109 * slices. Since we use texel rows in tex->slice_offsets, we do not need
110 * to divide QPitch by 4.
111 */
112 layout->qpitch = h0 + h1 +
113 ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
114
115 if (layout->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 &&
116 templ->height0 % 4 == 1)
117 layout->qpitch += 4;
118 }
119
120 static void
121 tex_layout_init_alignments(struct tex_layout *layout)
122 {
123 const struct pipe_resource *templ = layout->templ;
124
125 /*
126 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
127 *
128 * "surface format align_i align_j
129 * YUV 4:2:2 formats 4 *see below
130 * BC1-5 4 4
131 * FXT1 8 4
132 * all other formats 4 *see below"
133 *
134 * "- align_j = 4 for any depth buffer
135 * - align_j = 2 for separate stencil buffer
136 * - align_j = 4 for any render target surface is multisampled (4x)
137 * - align_j = 4 for any render target surface with Surface Vertical
138 * Alignment = VALIGN_4
139 * - align_j = 2 for any render target surface with Surface Vertical
140 * Alignment = VALIGN_2
141 * - align_j = 2 for all other render target surface
142 * - align_j = 2 for any sampling engine surface with Surface Vertical
143 * Alignment = VALIGN_2
144 * - align_j = 4 for any sampling engine surface with Surface Vertical
145 * Alignment = VALIGN_4"
146 *
147 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
148 *
149 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
150 * the Surface Format is 96 bits per element (BPE)."
151 *
152 * They can be rephrased as
153 *
154 * align_i align_j
155 * compressed formats block width block height
156 * PIPE_FORMAT_S8_UINT 4 2
157 * other depth/stencil formats 4 4
158 * 4x multisampled 4 4
159 * bpp 96 4 2
160 * others 4 2 or 4
161 */
162
163 /*
164 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
165 *
166 * "surface defined by surface format align_i align_j
167 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
168 * not D16_UNORM 4 4
169 * 3DSTATE_STENCIL_BUFFER N/A 8 8
170 * SURFACE_STATE BC*, ETC*, EAC* 4 4
171 * FXT1 8 4
172 * all others (set by SURFACE_STATE)"
173 *
174 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
175 *
176 * "- This field (Surface Vertical Aligment) is intended to be set to
177 * VALIGN_4 if the surface was rendered as a depth buffer, for a
178 * multisampled (4x) render target, or for a multisampled (8x)
179 * render target, since these surfaces support only alignment of 4.
180 * - Use of VALIGN_4 for other surfaces is supported, but uses more
181 * memory.
182 * - This field must be set to VALIGN_4 for all tiled Y Render Target
183 * surfaces.
184 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
185 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
186 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
187 * must be set to VALIGN_4."
188 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
189 *
190 * "- This field (Surface Horizontal Aligment) is intended to be set to
191 * HALIGN_8 only if the surface was rendered as a depth buffer with
192 * Z16 format or a stencil buffer, since these surfaces support only
193 * alignment of 8.
194 * - Use of HALIGN_8 for other surfaces is supported, but uses more
195 * memory.
196 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
197 * - This field must be set to HALIGN_8 if the Surface Format is
198 * FXT1."
199 *
200 * They can be rephrased as
201 *
202 * align_i align_j
203 * compressed formats block width block height
204 * PIPE_FORMAT_Z16_UNORM 8 4
205 * PIPE_FORMAT_S8_UINT 8 8
206 * other depth/stencil formats 4 or 8 4
207 * 2x or 4x multisampled 4 or 8 4
208 * tiled Y 4 or 8 4 (if rt)
209 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
210 * others 4 or 8 2 or 4
211 */
212
213 if (layout->compressed) {
214 /* this happens to be the case */
215 layout->align_i = layout->block_width;
216 layout->align_j = layout->block_height;
217 }
218 else if (layout->has_depth || layout->has_stencil) {
219 if (layout->dev->gen >= ILO_GEN(7)) {
220 switch (layout->format) {
221 case PIPE_FORMAT_Z16_UNORM:
222 layout->align_i = 8;
223 layout->align_j = 4;
224 break;
225 case PIPE_FORMAT_S8_UINT:
226 layout->align_i = 8;
227 layout->align_j = 8;
228 break;
229 default:
230 /*
231 * From the Ivy Bridge PRM, volume 2 part 1, page 319:
232 *
233 * "The 3 LSBs of both offsets (Depth Coordinate Offset Y and
234 * Depth Coordinate Offset X) must be zero to ensure correct
235 * alignment"
236 *
237 * We will make use of them and setting align_i to 8 help us meet
238 * the requirement.
239 */
240 layout->align_i = (templ->last_level > 0) ? 8 : 4;
241 layout->align_j = 4;
242 break;
243 }
244 }
245 else {
246 switch (layout->format) {
247 case PIPE_FORMAT_S8_UINT:
248 layout->align_i = 4;
249 layout->align_j = 2;
250 break;
251 default:
252 layout->align_i = 4;
253 layout->align_j = 4;
254 break;
255 }
256 }
257 }
258 else {
259 const bool valign_4 = (templ->nr_samples > 1) ||
260 (layout->dev->gen >= ILO_GEN(7) &&
261 layout->tiling == INTEL_TILING_Y &&
262 (templ->bind & PIPE_BIND_RENDER_TARGET));
263
264 if (valign_4)
265 assert(layout->block_size != 12);
266
267 layout->align_i = 4;
268 layout->align_j = (valign_4) ? 4 : 2;
269 }
270
271 /*
272 * the fact that align i and j are multiples of block width and height
273 * respectively is what makes the size of the bo a multiple of the block
274 * size, slices start at block boundaries, and many of the computations
275 * work.
276 */
277 assert(layout->align_i % layout->block_width == 0);
278 assert(layout->align_j % layout->block_height == 0);
279
280 /* make sure align() works */
281 assert(util_is_power_of_two(layout->align_i) &&
282 util_is_power_of_two(layout->align_j));
283 assert(util_is_power_of_two(layout->block_width) &&
284 util_is_power_of_two(layout->block_height));
285 }
286
287 static void
288 tex_layout_init_levels(struct tex_layout *layout)
289 {
290 const struct pipe_resource *templ = layout->templ;
291 int last_level, lv;
292
293 last_level = templ->last_level;
294
295 /* need at least 2 levels to compute full qpitch */
296 if (last_level == 0 && templ->array_size > 1 && layout->array_spacing_full)
297 last_level++;
298
299 /* compute mip level sizes */
300 for (lv = 0; lv <= last_level; lv++) {
301 int w, h, d;
302
303 w = u_minify(templ->width0, lv);
304 h = u_minify(templ->height0, lv);
305 d = u_minify(templ->depth0, lv);
306
307 /*
308 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
309 *
310 * "The dimensions of the mip maps are first determined by applying
311 * the sizing algorithm presented in Non-Power-of-Two Mipmaps
312 * above. Then, if necessary, they are padded out to compression
313 * block boundaries."
314 */
315 w = align(w, layout->block_width);
316 h = align(h, layout->block_height);
317
318 /*
319 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
320 *
321 * "If the surface is multisampled (4x), these values must be
322 * adjusted as follows before proceeding:
323 *
324 * W_L = ceiling(W_L / 2) * 4
325 * H_L = ceiling(H_L / 2) * 4"
326 *
327 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
328 *
329 * "If the surface is multisampled and it is a depth or stencil
330 * surface or Multisampled Surface StorageFormat in SURFACE_STATE
331 * is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows
332 * before proceeding:
333 *
334 * #samples W_L = H_L =
335 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
336 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
337 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
338 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
339 *
340 * For interleaved samples (4x), where pixels
341 *
342 * (x, y ) (x+1, y )
343 * (x, y+1) (x+1, y+1)
344 *
345 * would be is occupied by
346 *
347 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
348 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
349 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
350 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
351 *
352 * Thus the need to
353 *
354 * w = align(w, 2) * 2;
355 * y = align(y, 2) * 2;
356 */
357 if (layout->interleaved) {
358 switch (templ->nr_samples) {
359 case 0:
360 case 1:
361 break;
362 case 2:
363 w = align(w, 2) * 2;
364 break;
365 case 4:
366 w = align(w, 2) * 2;
367 h = align(h, 2) * 2;
368 break;
369 case 8:
370 w = align(w, 2) * 4;
371 h = align(h, 2) * 2;
372 break;
373 case 16:
374 w = align(w, 2) * 4;
375 h = align(h, 2) * 4;
376 break;
377 default:
378 assert(!"unsupported sample count");
379 break;
380 }
381 }
382
383 layout->levels[lv].w = w;
384 layout->levels[lv].h = h;
385 layout->levels[lv].d = d;
386 }
387 }
388
389 static void
390 tex_layout_init_spacing(struct tex_layout *layout)
391 {
392 const struct pipe_resource *templ = layout->templ;
393
394 if (layout->dev->gen >= ILO_GEN(7)) {
395 /*
396 * It is not explicitly states, but render targets are expected to be
397 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are
398 * expected to be IMS (samples interleaved).
399 *
400 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
401 */
402 if (layout->has_depth || layout->has_stencil) {
403 layout->interleaved = true;
404
405 /*
406 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
407 *
408 * "note that the depth buffer and stencil buffer have an implied
409 * value of ARYSPC_FULL"
410 */
411 layout->array_spacing_full = true;
412 }
413 else {
414 layout->interleaved = false;
415
416 /*
417 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
418 *
419 * "If Multisampled Surface Storage Format is MSFMT_MSS and
420 * Number of Multisamples is not MULTISAMPLECOUNT_1, this field
421 * (Surface Array Spacing) must be set to ARYSPC_LOD0."
422 *
423 * As multisampled resources are not mipmapped, we never use
424 * ARYSPC_FULL for them.
425 */
426 if (templ->nr_samples > 1)
427 assert(templ->last_level == 0);
428 layout->array_spacing_full = (templ->last_level > 0);
429 }
430 }
431 else {
432 /* GEN6 supports only interleaved samples */
433 layout->interleaved = true;
434
435 /*
436 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
437 *
438 * "The separate stencil buffer does not support mip mapping, thus
439 * the storage for LODs other than LOD 0 is not needed. The
440 * following QPitch equation applies only to the separate stencil
441 * buffer:
442 *
443 * QPitch = h_0"
444 *
445 * GEN6 does not support compact spacing otherwise.
446 */
447 layout->array_spacing_full = (layout->format != PIPE_FORMAT_S8_UINT);
448 }
449 }
450
451 static void
452 tex_layout_init_tiling(struct tex_layout *layout)
453 {
454 const struct pipe_resource *templ = layout->templ;
455 const enum pipe_format format = layout->format;
456 const unsigned tile_none = 1 << INTEL_TILING_NONE;
457 const unsigned tile_x = 1 << INTEL_TILING_X;
458 const unsigned tile_y = 1 << INTEL_TILING_Y;
459 unsigned valid_tilings = tile_none | tile_x | tile_y;
460
461 /*
462 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
463 *
464 * "Display/Overlay Y-Major not supported.
465 * X-Major required for Async Flips"
466 */
467 if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
468 valid_tilings &= tile_x;
469
470 /*
471 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
472 *
473 * "The cursor surface address must be 4K byte aligned. The cursor must
474 * be in linear memory, it cannot be tiled."
475 */
476 if (unlikely(templ->bind & PIPE_BIND_CURSOR))
477 valid_tilings &= tile_none;
478
479 /*
480 * From the Ivy Bridge PRM, volume 4 part 1, page 76:
481 *
482 * "The MCS surface must be stored as Tile Y."
483 */
484 if (templ->bind & ILO_BIND_MCS)
485 valid_tilings &= tile_y;
486
487 /*
488 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
489 *
490 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
491 * Depth Buffer is not supported."
492 *
493 * "The Depth Buffer, if tiled, must use Y-Major tiling."
494 *
495 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
496 *
497 * "W-Major Tile Format is used for separate stencil."
498 *
499 * Since the HW does not support W-tiled fencing, we have to do it in the
500 * driver.
501 */
502 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
503 switch (format) {
504 case PIPE_FORMAT_S8_UINT:
505 valid_tilings &= tile_none;
506 break;
507 default:
508 valid_tilings &= tile_y;
509 break;
510 }
511 }
512
513 if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
514 if (templ->bind & PIPE_BIND_RENDER_TARGET) {
515 /*
516 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
517 *
518 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
519 * either TileX or Linear."
520 */
521 if (layout->block_size == 16)
522 valid_tilings &= ~tile_y;
523
524 /*
525 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
526 *
527 * "This field (Surface Vertical Aligment) must be set to
528 * VALIGN_4 for all tiled Y Render Target surfaces."
529 *
530 * "VALIGN_4 is not supported for surface format
531 * R32G32B32_FLOAT."
532 */
533 if (layout->dev->gen >= ILO_GEN(7) && layout->block_size == 12)
534 valid_tilings &= ~tile_y;
535 }
536
537 /*
538 * Also, heuristically set a minimum width/height for enabling tiling.
539 */
540 if (templ->width0 < 64 && (valid_tilings & ~tile_x))
541 valid_tilings &= ~tile_x;
542
543 if ((templ->width0 < 32 || templ->height0 < 16) &&
544 (templ->width0 < 16 || templ->height0 < 32) &&
545 (valid_tilings & ~tile_y))
546 valid_tilings &= ~tile_y;
547 }
548 else {
549 /* force linear if we are not sure where the texture is bound to */
550 if (valid_tilings & tile_none)
551 valid_tilings &= tile_none;
552 }
553
554 /* no conflicting binding flags */
555 assert(valid_tilings);
556
557 /* prefer tiled than linear */
558 if (valid_tilings & tile_y)
559 layout->tiling = INTEL_TILING_Y;
560 else if (valid_tilings & tile_x)
561 layout->tiling = INTEL_TILING_X;
562 else
563 layout->tiling = INTEL_TILING_NONE;
564
565 layout->can_be_linear = valid_tilings & tile_none;
566 }
567
568 static void
569 tex_layout_init_format(struct tex_layout *layout)
570 {
571 const struct pipe_resource *templ = layout->templ;
572 enum pipe_format format;
573 const struct util_format_description *desc;
574 bool separate_stencil;
575
576 /* GEN7+ requires separate stencil buffers */
577 separate_stencil = (layout->dev->gen >= ILO_GEN(7));
578
579 switch (templ->format) {
580 case PIPE_FORMAT_ETC1_RGB8:
581 format = PIPE_FORMAT_R8G8B8X8_UNORM;
582 break;
583 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
584 if (separate_stencil) {
585 format = PIPE_FORMAT_Z24X8_UNORM;
586 layout->separate_stencil = true;
587 }
588 else {
589 format = templ->format;
590 }
591 break;
592 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
593 if (separate_stencil) {
594 format = PIPE_FORMAT_Z32_FLOAT;
595 layout->separate_stencil = true;
596 }
597 else {
598 format = templ->format;
599 }
600 break;
601 default:
602 format = templ->format;
603 break;
604 }
605
606 layout->format = format;
607
608 layout->block_width = util_format_get_blockwidth(format);
609 layout->block_height = util_format_get_blockheight(format);
610 layout->block_size = util_format_get_blocksize(format);
611 layout->compressed = util_format_is_compressed(format);
612
613 desc = util_format_description(format);
614 layout->has_depth = util_format_has_depth(desc);
615 layout->has_stencil = util_format_has_stencil(desc);
616 }
617
618 static void
619 tex_layout_init(struct tex_layout *layout,
620 struct pipe_screen *screen,
621 const struct pipe_resource *templ,
622 struct ilo_texture_slice **slices)
623 {
624 struct ilo_screen *is = ilo_screen(screen);
625
626 memset(layout, 0, sizeof(*layout));
627
628 layout->dev = &is->dev;
629 layout->templ = templ;
630
631 /* note that there are dependencies between these functions */
632 tex_layout_init_format(layout);
633 tex_layout_init_tiling(layout);
634 tex_layout_init_spacing(layout);
635 tex_layout_init_levels(layout);
636 tex_layout_init_alignments(layout);
637 tex_layout_init_qpitch(layout);
638
639 if (slices) {
640 int lv;
641
642 for (lv = 0; lv <= templ->last_level; lv++)
643 layout->levels[lv].slices = slices[lv];
644 }
645 }
646
647 static bool
648 tex_layout_force_linear(struct tex_layout *layout)
649 {
650 if (!layout->can_be_linear)
651 return false;
652
653 /*
654 * we may be able to switch from VALIGN_4 to VALIGN_2 when the layout was
655 * Y-tiled, but let's keep it simple
656 */
657 layout->tiling = INTEL_TILING_NONE;
658
659 return true;
660 }
661
662 /**
663 * Layout a 2D texture.
664 */
665 static void
666 tex_layout_2d(struct tex_layout *layout)
667 {
668 const struct pipe_resource *templ = layout->templ;
669 unsigned int level_x, level_y, num_slices;
670 int lv;
671
672 level_x = 0;
673 level_y = 0;
674 for (lv = 0; lv <= templ->last_level; lv++) {
675 const unsigned int level_w = layout->levels[lv].w;
676 const unsigned int level_h = layout->levels[lv].h;
677 int slice;
678
679 /* set slice offsets */
680 if (layout->levels[lv].slices) {
681 for (slice = 0; slice < templ->array_size; slice++) {
682 layout->levels[lv].slices[slice].x = level_x;
683 /* slices are qpitch apart in Y-direction */
684 layout->levels[lv].slices[slice].y =
685 level_y + layout->qpitch * slice;
686 }
687 }
688
689 /* extend the size of the monolithic bo to cover this mip level */
690 if (layout->width < level_x + level_w)
691 layout->width = level_x + level_w;
692 if (layout->height < level_y + level_h)
693 layout->height = level_y + level_h;
694
695 /* MIPLAYOUT_BELOW */
696 if (lv == 1)
697 level_x += align(level_w, layout->align_i);
698 else
699 level_y += align(level_h, layout->align_j);
700 }
701
702 num_slices = templ->array_size;
703 /* samples of the same index are stored in a slice */
704 if (templ->nr_samples > 1 && !layout->interleaved)
705 num_slices *= templ->nr_samples;
706
707 /* we did not take slices into consideration in the computation above */
708 layout->height += layout->qpitch * (num_slices - 1);
709 }
710
711 /**
712 * Layout a 3D texture.
713 */
714 static void
715 tex_layout_3d(struct tex_layout *layout)
716 {
717 const struct pipe_resource *templ = layout->templ;
718 unsigned int level_y;
719 int lv;
720
721 level_y = 0;
722 for (lv = 0; lv <= templ->last_level; lv++) {
723 const unsigned int level_w = layout->levels[lv].w;
724 const unsigned int level_h = layout->levels[lv].h;
725 const unsigned int level_d = layout->levels[lv].d;
726 const unsigned int slice_pitch = align(level_w, layout->align_i);
727 const unsigned int slice_qpitch = align(level_h, layout->align_j);
728 const unsigned int num_slices_per_row = 1 << lv;
729 int slice;
730
731 for (slice = 0; slice < level_d; slice += num_slices_per_row) {
732 int i;
733
734 /* set slice offsets */
735 if (layout->levels[lv].slices) {
736 for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
737 layout->levels[lv].slices[slice + i].x = slice_pitch * i;
738 layout->levels[lv].slices[slice + i].y = level_y;
739 }
740 }
741
742 /* move on to the next slice row */
743 level_y += slice_qpitch;
744 }
745
746 /* rightmost slice */
747 slice = MIN2(num_slices_per_row, level_d) - 1;
748
749 /* extend the size of the monolithic bo to cover this slice */
750 if (layout->width < slice_pitch * slice + level_w)
751 layout->width = slice_pitch * slice + level_w;
752 if (lv == templ->last_level)
753 layout->height = (level_y - slice_qpitch) + level_h;
754 }
755 }
756
757 static void
758 tex_layout_validate(struct tex_layout *layout)
759 {
760 /*
761 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
762 *
763 * "A 4KB tile is subdivided into 8-high by 8-wide array of Blocks for
764 * W-Major Tiles (W Tiles). Each Block is 8 rows by 8 bytes."
765 *
766 * Since we ask for INTEL_TILING_NONE instead of the non-existent
767 * INTEL_TILING_W, we need to manually align the width and height to the
768 * tile boundaries.
769 */
770 if (layout->templ->format == PIPE_FORMAT_S8_UINT) {
771 layout->width = align(layout->width, 64);
772 layout->height = align(layout->height, 64);
773 }
774
775 assert(layout->width % layout->block_width == 0);
776 assert(layout->height % layout->block_height == 0);
777 assert(layout->qpitch % layout->block_height == 0);
778 }
779
780 static size_t
781 tex_layout_estimate_size(const struct tex_layout *layout)
782 {
783 unsigned stride, height;
784
785 stride = (layout->width / layout->block_width) * layout->block_size;
786 height = layout->height / layout->block_height;
787
788 switch (layout->tiling) {
789 case INTEL_TILING_X:
790 stride = align(stride, 512);
791 height = align(height, 8);
792 break;
793 case INTEL_TILING_Y:
794 stride = align(stride, 128);
795 height = align(height, 32);
796 break;
797 default:
798 height = align(height, 2);
799 break;
800 }
801
802 return stride * height;
803 }
804
805 static void
806 tex_layout_apply(const struct tex_layout *layout, struct ilo_texture *tex)
807 {
808 tex->bo_format = layout->format;
809
810 /* in blocks */
811 tex->bo_width = layout->width / layout->block_width;
812 tex->bo_height = layout->height / layout->block_height;
813 tex->bo_cpp = layout->block_size;
814 tex->tiling = layout->tiling;
815
816 tex->compressed = layout->compressed;
817 tex->block_width = layout->block_width;
818 tex->block_height = layout->block_height;
819
820 tex->halign_8 = (layout->align_i == 8);
821 tex->valign_4 = (layout->align_j == 4);
822 tex->array_spacing_full = layout->array_spacing_full;
823 tex->interleaved = layout->interleaved;
824 }
825
826 static void
827 tex_free_slices(struct ilo_texture *tex)
828 {
829 FREE(tex->slice_offsets[0]);
830 }
831
832 static bool
833 tex_alloc_slices(struct ilo_texture *tex)
834 {
835 const struct pipe_resource *templ = &tex->base;
836 struct ilo_texture_slice *slices;
837 int depth, lv;
838
839 /* sum the depths of all levels */
840 depth = 0;
841 for (lv = 0; lv <= templ->last_level; lv++)
842 depth += u_minify(templ->depth0, lv);
843
844 /*
845 * There are (depth * tex->base.array_size) slices in total. Either depth
846 * is one (non-3D) or templ->array_size is one (non-array), but it does
847 * not matter.
848 */
849 slices = CALLOC(depth * templ->array_size, sizeof(*slices));
850 if (!slices)
851 return false;
852
853 tex->slice_offsets[0] = slices;
854
855 /* point to the respective positions in the buffer */
856 for (lv = 1; lv <= templ->last_level; lv++) {
857 tex->slice_offsets[lv] = tex->slice_offsets[lv - 1] +
858 u_minify(templ->depth0, lv - 1) * templ->array_size;
859 }
860
861 return true;
862 }
863
864 static bool
865 tex_create_bo(struct ilo_texture *tex,
866 const struct winsys_handle *handle)
867 {
868 struct ilo_screen *is = ilo_screen(tex->base.screen);
869 const char *name;
870 struct intel_bo *bo;
871 enum intel_tiling_mode tiling;
872 unsigned long pitch;
873
874 switch (tex->base.target) {
875 case PIPE_TEXTURE_1D:
876 name = "1D texture";
877 break;
878 case PIPE_TEXTURE_2D:
879 name = "2D texture";
880 break;
881 case PIPE_TEXTURE_3D:
882 name = "3D texture";
883 break;
884 case PIPE_TEXTURE_CUBE:
885 name = "cube texture";
886 break;
887 case PIPE_TEXTURE_RECT:
888 name = "rectangle texture";
889 break;
890 case PIPE_TEXTURE_1D_ARRAY:
891 name = "1D array texture";
892 break;
893 case PIPE_TEXTURE_2D_ARRAY:
894 name = "2D array texture";
895 break;
896 case PIPE_TEXTURE_CUBE_ARRAY:
897 name = "cube array texture";
898 break;
899 default:
900 name ="unknown texture";
901 break;
902 }
903
904 if (handle) {
905 bo = intel_winsys_import_handle(is->winsys, name, handle,
906 tex->bo_width, tex->bo_height, tex->bo_cpp,
907 &tiling, &pitch);
908 }
909 else {
910 bo = intel_winsys_alloc_texture(is->winsys, name,
911 tex->bo_width, tex->bo_height, tex->bo_cpp,
912 tex->tiling, tex->bo_flags, &pitch);
913
914 tiling = tex->tiling;
915 }
916
917 if (!bo)
918 return false;
919
920 if (tex->bo)
921 intel_bo_unreference(tex->bo);
922
923 tex->bo = bo;
924 tex->tiling = tiling;
925 tex->bo_stride = pitch;
926
927 return true;
928 }
929
930 static void
931 tex_destroy(struct ilo_texture *tex)
932 {
933 if (tex->separate_s8)
934 tex_destroy(tex->separate_s8);
935
936 intel_bo_unreference(tex->bo);
937 tex_free_slices(tex);
938 FREE(tex);
939 }
940
941 static struct pipe_resource *
942 tex_create(struct pipe_screen *screen,
943 const struct pipe_resource *templ,
944 const struct winsys_handle *handle)
945 {
946 struct tex_layout layout;
947 struct ilo_texture *tex;
948
949 tex = CALLOC_STRUCT(ilo_texture);
950 if (!tex)
951 return NULL;
952
953 tex->base = *templ;
954 tex->base.screen = screen;
955 pipe_reference_init(&tex->base.reference, 1);
956
957 if (!tex_alloc_slices(tex)) {
958 FREE(tex);
959 return NULL;
960 }
961
962 tex->imported = (handle != NULL);
963
964 if (tex->base.bind & (PIPE_BIND_DEPTH_STENCIL |
965 PIPE_BIND_RENDER_TARGET))
966 tex->bo_flags |= INTEL_ALLOC_FOR_RENDER;
967
968 tex_layout_init(&layout, screen, templ, tex->slice_offsets);
969
970 switch (templ->target) {
971 case PIPE_TEXTURE_1D:
972 case PIPE_TEXTURE_2D:
973 case PIPE_TEXTURE_CUBE:
974 case PIPE_TEXTURE_RECT:
975 case PIPE_TEXTURE_1D_ARRAY:
976 case PIPE_TEXTURE_2D_ARRAY:
977 case PIPE_TEXTURE_CUBE_ARRAY:
978 tex_layout_2d(&layout);
979 break;
980 case PIPE_TEXTURE_3D:
981 tex_layout_3d(&layout);
982 break;
983 default:
984 assert(!"unknown resource target");
985 break;
986 }
987
988 tex_layout_validate(&layout);
989
990 /* make sure the bo can be mapped through GTT if tiled */
991 if (layout.tiling != INTEL_TILING_NONE) {
992 /*
993 * Usually only the first 256MB of the GTT is mappable.
994 *
995 * See also how intel_context::max_gtt_map_object_size is calculated.
996 */
997 const size_t mappable_gtt_size = 256 * 1024 * 1024;
998 const size_t size = tex_layout_estimate_size(&layout);
999
1000 /* be conservative */
1001 if (size > mappable_gtt_size / 4)
1002 tex_layout_force_linear(&layout);
1003 }
1004
1005 tex_layout_apply(&layout, tex);
1006
1007 if (!tex_create_bo(tex, handle)) {
1008 tex_free_slices(tex);
1009 FREE(tex);
1010 return NULL;
1011 }
1012
1013 /* allocate separate stencil resource */
1014 if (layout.separate_stencil) {
1015 struct pipe_resource s8_templ = *layout.templ;
1016 struct pipe_resource *s8;
1017
1018 /*
1019 * Unless PIPE_BIND_DEPTH_STENCIL is set, the resource may have other
1020 * tilings. But that should be fine since it will never be bound as the
1021 * stencil buffer, and our transfer code can handle all tilings.
1022 */
1023 s8_templ.format = PIPE_FORMAT_S8_UINT;
1024
1025 s8 = screen->resource_create(screen, &s8_templ);
1026 if (!s8) {
1027 tex_destroy(tex);
1028 return NULL;
1029 }
1030
1031 tex->separate_s8 = ilo_texture(s8);
1032
1033 assert(tex->separate_s8->bo_format == PIPE_FORMAT_S8_UINT);
1034 }
1035
1036 return &tex->base;
1037 }
1038
1039 static bool
1040 tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle)
1041 {
1042 struct ilo_screen *is = ilo_screen(tex->base.screen);
1043 int err;
1044
1045 err = intel_winsys_export_handle(is->winsys, tex->bo,
1046 tex->tiling, tex->bo_stride, handle);
1047
1048 return !err;
1049 }
1050
1051 /**
1052 * Estimate the texture size. For large textures, the errors should be pretty
1053 * small.
1054 */
1055 static size_t
1056 tex_estimate_size(struct pipe_screen *screen,
1057 const struct pipe_resource *templ)
1058 {
1059 struct tex_layout layout;
1060
1061 tex_layout_init(&layout, screen, templ, NULL);
1062
1063 switch (templ->target) {
1064 case PIPE_TEXTURE_3D:
1065 tex_layout_3d(&layout);
1066 break;
1067 default:
1068 tex_layout_2d(&layout);
1069 break;
1070 }
1071
1072 tex_layout_validate(&layout);
1073
1074 return tex_layout_estimate_size(&layout);
1075 }
1076
1077 static bool
1078 buf_create_bo(struct ilo_buffer *buf)
1079 {
1080 struct ilo_screen *is = ilo_screen(buf->base.screen);
1081 const char *name;
1082 struct intel_bo *bo;
1083
1084 switch (buf->base.bind) {
1085 case PIPE_BIND_VERTEX_BUFFER:
1086 name = "vertex buffer";
1087 break;
1088 case PIPE_BIND_INDEX_BUFFER:
1089 name = "index buffer";
1090 break;
1091 case PIPE_BIND_CONSTANT_BUFFER:
1092 name = "constant buffer";
1093 break;
1094 case PIPE_BIND_STREAM_OUTPUT:
1095 name = "stream output";
1096 break;
1097 default:
1098 name = "unknown buffer";
1099 break;
1100 }
1101
1102 bo = intel_winsys_alloc_buffer(is->winsys,
1103 name, buf->bo_size, buf->bo_flags);
1104 if (!bo)
1105 return false;
1106
1107 if (buf->bo)
1108 intel_bo_unreference(buf->bo);
1109
1110 buf->bo = bo;
1111
1112 return true;
1113 }
1114
1115 static void
1116 buf_destroy(struct ilo_buffer *buf)
1117 {
1118 intel_bo_unreference(buf->bo);
1119 FREE(buf);
1120 }
1121
1122 static struct pipe_resource *
1123 buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
1124 {
1125 struct ilo_buffer *buf;
1126
1127 buf = CALLOC_STRUCT(ilo_buffer);
1128 if (!buf)
1129 return NULL;
1130
1131 buf->base = *templ;
1132 buf->base.screen = screen;
1133 pipe_reference_init(&buf->base.reference, 1);
1134
1135 buf->bo_size = templ->width0;
1136 buf->bo_flags = 0;
1137
1138 if (!buf_create_bo(buf)) {
1139 FREE(buf);
1140 return NULL;
1141 }
1142
1143 return &buf->base;
1144 }
1145
1146 static boolean
1147 ilo_can_create_resource(struct pipe_screen *screen,
1148 const struct pipe_resource *templ)
1149 {
1150 /*
1151 * We do not know if we will fail until we try to allocate the bo.
1152 * So just set a limit on the texture size.
1153 */
1154 const size_t max_size = 1 * 1024 * 1024 * 1024;
1155 size_t size;
1156
1157 if (templ->target == PIPE_BUFFER)
1158 size = templ->width0;
1159 else
1160 size = tex_estimate_size(screen, templ);
1161
1162 return (size <= max_size);
1163 }
1164
1165 static struct pipe_resource *
1166 ilo_resource_create(struct pipe_screen *screen,
1167 const struct pipe_resource *templ)
1168 {
1169 if (templ->target == PIPE_BUFFER)
1170 return buf_create(screen, templ);
1171 else
1172 return tex_create(screen, templ, NULL);
1173 }
1174
1175 static struct pipe_resource *
1176 ilo_resource_from_handle(struct pipe_screen *screen,
1177 const struct pipe_resource *templ,
1178 struct winsys_handle *handle)
1179 {
1180 if (templ->target == PIPE_BUFFER)
1181 return NULL;
1182 else
1183 return tex_create(screen, templ, handle);
1184 }
1185
1186 static boolean
1187 ilo_resource_get_handle(struct pipe_screen *screen,
1188 struct pipe_resource *res,
1189 struct winsys_handle *handle)
1190 {
1191 if (res->target == PIPE_BUFFER)
1192 return false;
1193 else
1194 return tex_get_handle(ilo_texture(res), handle);
1195
1196 }
1197
1198 static void
1199 ilo_resource_destroy(struct pipe_screen *screen,
1200 struct pipe_resource *res)
1201 {
1202 if (res->target == PIPE_BUFFER)
1203 buf_destroy(ilo_buffer(res));
1204 else
1205 tex_destroy(ilo_texture(res));
1206 }
1207
1208 /**
1209 * Initialize resource-related functions.
1210 */
1211 void
1212 ilo_init_resource_functions(struct ilo_screen *is)
1213 {
1214 is->base.can_create_resource = ilo_can_create_resource;
1215 is->base.resource_create = ilo_resource_create;
1216 is->base.resource_from_handle = ilo_resource_from_handle;
1217 is->base.resource_get_handle = ilo_resource_get_handle;
1218 is->base.resource_destroy = ilo_resource_destroy;
1219 }
1220
1221 bool
1222 ilo_buffer_alloc_bo(struct ilo_buffer *buf)
1223 {
1224 return buf_create_bo(buf);
1225 }
1226
1227 bool
1228 ilo_texture_alloc_bo(struct ilo_texture *tex)
1229 {
1230 /* a shared bo cannot be reallocated */
1231 if (tex->imported)
1232 return false;
1233
1234 return tex_create_bo(tex, NULL);
1235 }
1236
1237 /**
1238 * Return the offset (in bytes) to a slice within the bo.
1239 *
1240 * The returned offset is aligned to tile size. Since slices are not
1241 * guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
1242 * from the tile origin to the slice are also returned. X offset is always a
1243 * multiple of 4 and Y offset is always a multiple of 2.
1244 */
1245 unsigned
1246 ilo_texture_get_slice_offset(const struct ilo_texture *tex,
1247 int level, int slice,
1248 unsigned *x_offset, unsigned *y_offset)
1249 {
1250 unsigned tile_w, tile_h, tile_size, row_size;
1251 unsigned x, y, slice_offset;
1252
1253 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1254
1255 switch (tex->tiling) {
1256 case INTEL_TILING_NONE:
1257 /* W-tiled */
1258 if (tex->bo_format == PIPE_FORMAT_S8_UINT) {
1259 tile_w = 64;
1260 tile_h = 64;
1261 }
1262 else {
1263 tile_w = 1;
1264 tile_h = 1;
1265 }
1266 break;
1267 case INTEL_TILING_X:
1268 tile_w = 512;
1269 tile_h = 8;
1270 break;
1271 case INTEL_TILING_Y:
1272 tile_w = 128;
1273 tile_h = 32;
1274 break;
1275 default:
1276 assert(!"unknown tiling");
1277 tile_w = 1;
1278 tile_h = 1;
1279 break;
1280 }
1281
1282 tile_size = tile_w * tile_h;
1283 row_size = tex->bo_stride * tile_h;
1284
1285 /* in bytes */
1286 x = tex->slice_offsets[level][slice].x / tex->block_width * tex->bo_cpp;
1287 y = tex->slice_offsets[level][slice].y / tex->block_height;
1288 slice_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
1289
1290 /*
1291 * Since tex->bo_stride is a multiple of tile_w, slice_offset should be
1292 * aligned at this point.
1293 */
1294 assert(slice_offset % tile_size == 0);
1295
1296 /*
1297 * because of the possible values of align_i and align_j in
1298 * tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
1299 * 4 and y_offset is guaranteed to be a multiple of 2.
1300 */
1301 if (x_offset) {
1302 /* in pixels */
1303 x = (x % tile_w) / tex->bo_cpp * tex->block_width;
1304 assert(x % 4 == 0);
1305
1306 *x_offset = x;
1307 }
1308
1309 if (y_offset) {
1310 /* in pixels */
1311 y = (y % tile_h) * tex->block_height;
1312 assert(y % 2 == 0);
1313
1314 *y_offset = y;
1315 }
1316
1317 return slice_offset;
1318 }