ilo: try unblocking a transfer with a staging bo
[mesa.git] / src / gallium / drivers / ilo / ilo_resource.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "ilo_screen.h"
29 #include "ilo_resource.h"
30
31 /* use PIPE_BIND_CUSTOM to indicate MCS */
32 #define ILO_BIND_MCS PIPE_BIND_CUSTOM
33
34 struct tex_layout {
35 const struct ilo_dev_info *dev;
36 const struct pipe_resource *templ;
37
38 bool has_depth, has_stencil;
39 bool hiz, separate_stencil;
40
41 enum pipe_format format;
42 unsigned block_width, block_height, block_size;
43 bool compressed;
44
45 enum intel_tiling_mode tiling;
46 unsigned valid_tilings; /* bitmask of valid tiling modes */
47
48 bool array_spacing_full;
49 bool interleaved;
50
51 struct {
52 int w, h, d;
53 struct ilo_texture_slice *slices;
54 } levels[PIPE_MAX_TEXTURE_LEVELS];
55
56 int align_i, align_j;
57 int qpitch;
58
59 int width, height;
60
61 int bo_stride, bo_height;
62 int hiz_stride, hiz_height;
63 };
64
65 /*
66 * From the Ivy Bridge PRM, volume 1 part 1, page 105:
67 *
68 * "In addition to restrictions on maximum height, width, and depth,
69 * surfaces are also restricted to a maximum size in bytes. This
70 * maximum is 2 GB for all products and all surface types."
71 */
72 static const size_t max_resource_size = 1u << 31;
73
74 static const char *
75 resource_get_bo_name(const struct pipe_resource *templ)
76 {
77 static const char *target_names[PIPE_MAX_TEXTURE_TYPES] = {
78 [PIPE_BUFFER] = "buf",
79 [PIPE_TEXTURE_1D] = "tex-1d",
80 [PIPE_TEXTURE_2D] = "tex-2d",
81 [PIPE_TEXTURE_3D] = "tex-3d",
82 [PIPE_TEXTURE_CUBE] = "tex-cube",
83 [PIPE_TEXTURE_RECT] = "tex-rect",
84 [PIPE_TEXTURE_1D_ARRAY] = "tex-1d-array",
85 [PIPE_TEXTURE_2D_ARRAY] = "tex-2d-array",
86 [PIPE_TEXTURE_CUBE_ARRAY] = "tex-cube-array",
87 };
88 const char *name = target_names[templ->target];
89
90 if (templ->target == PIPE_BUFFER) {
91 switch (templ->bind) {
92 case PIPE_BIND_VERTEX_BUFFER:
93 name = "buf-vb";
94 break;
95 case PIPE_BIND_INDEX_BUFFER:
96 name = "buf-ib";
97 break;
98 case PIPE_BIND_CONSTANT_BUFFER:
99 name = "buf-cb";
100 break;
101 case PIPE_BIND_STREAM_OUTPUT:
102 name = "buf-so";
103 break;
104 default:
105 break;
106 }
107 }
108
109 return name;
110 }
111
112 static enum intel_domain_flag
113 resource_get_bo_initial_domain(const struct pipe_resource *templ)
114 {
115 return (templ->bind & (PIPE_BIND_DEPTH_STENCIL |
116 PIPE_BIND_RENDER_TARGET |
117 PIPE_BIND_STREAM_OUTPUT)) ?
118 INTEL_DOMAIN_RENDER : 0;
119 }
120
121 static void
122 tex_layout_init_qpitch(struct tex_layout *layout)
123 {
124 const struct pipe_resource *templ = layout->templ;
125 int h0, h1;
126
127 if (templ->array_size <= 1)
128 return;
129
130 h0 = align(layout->levels[0].h, layout->align_j);
131
132 if (!layout->array_spacing_full) {
133 layout->qpitch = h0;
134 return;
135 }
136
137 h1 = align(layout->levels[1].h, layout->align_j);
138
139 /*
140 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
141 *
142 * "The following equation is used for surface formats other than
143 * compressed textures:
144 *
145 * QPitch = (h0 + h1 + 11j)"
146 *
147 * "The equation for compressed textures (BC* and FXT1 surface formats)
148 * follows:
149 *
150 * QPitch = (h0 + h1 + 11j) / 4"
151 *
152 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
153 * value calculated in the equation above, for every other odd Surface
154 * Height starting from 1 i.e. 1,5,9,13"
155 *
156 * From the Ivy Bridge PRM, volume 1 part 1, page 111-112:
157 *
158 * "If Surface Array Spacing is set to ARYSPC_FULL (note that the depth
159 * buffer and stencil buffer have an implied value of ARYSPC_FULL):
160 *
161 * QPitch = (h0 + h1 + 12j)
162 * QPitch = (h0 + h1 + 12j) / 4 (compressed)
163 *
164 * (There are many typos or missing words here...)"
165 *
166 * To access the N-th slice, an offset of (Stride * QPitch * N) is added to
167 * the base address. The PRM divides QPitch by 4 for compressed formats
168 * because the block height for those formats are 4, and it wants QPitch to
169 * mean the number of memory rows, as opposed to texel rows, between
170 * slices. Since we use texel rows in tex->slice_offsets, we do not need
171 * to divide QPitch by 4.
172 */
173 layout->qpitch = h0 + h1 +
174 ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * layout->align_j;
175
176 if (layout->dev->gen == ILO_GEN(6) && templ->nr_samples > 1 &&
177 templ->height0 % 4 == 1)
178 layout->qpitch += 4;
179 }
180
181 static void
182 tex_layout_init_alignments(struct tex_layout *layout)
183 {
184 const struct pipe_resource *templ = layout->templ;
185
186 /*
187 * From the Sandy Bridge PRM, volume 1 part 1, page 113:
188 *
189 * "surface format align_i align_j
190 * YUV 4:2:2 formats 4 *see below
191 * BC1-5 4 4
192 * FXT1 8 4
193 * all other formats 4 *see below"
194 *
195 * "- align_j = 4 for any depth buffer
196 * - align_j = 2 for separate stencil buffer
197 * - align_j = 4 for any render target surface is multisampled (4x)
198 * - align_j = 4 for any render target surface with Surface Vertical
199 * Alignment = VALIGN_4
200 * - align_j = 2 for any render target surface with Surface Vertical
201 * Alignment = VALIGN_2
202 * - align_j = 2 for all other render target surface
203 * - align_j = 2 for any sampling engine surface with Surface Vertical
204 * Alignment = VALIGN_2
205 * - align_j = 4 for any sampling engine surface with Surface Vertical
206 * Alignment = VALIGN_4"
207 *
208 * From the Sandy Bridge PRM, volume 4 part 1, page 86:
209 *
210 * "This field (Surface Vertical Alignment) must be set to VALIGN_2 if
211 * the Surface Format is 96 bits per element (BPE)."
212 *
213 * They can be rephrased as
214 *
215 * align_i align_j
216 * compressed formats block width block height
217 * PIPE_FORMAT_S8_UINT 4 2
218 * other depth/stencil formats 4 4
219 * 4x multisampled 4 4
220 * bpp 96 4 2
221 * others 4 2 or 4
222 */
223
224 /*
225 * From the Ivy Bridge PRM, volume 1 part 1, page 110:
226 *
227 * "surface defined by surface format align_i align_j
228 * 3DSTATE_DEPTH_BUFFER D16_UNORM 8 4
229 * not D16_UNORM 4 4
230 * 3DSTATE_STENCIL_BUFFER N/A 8 8
231 * SURFACE_STATE BC*, ETC*, EAC* 4 4
232 * FXT1 8 4
233 * all others (set by SURFACE_STATE)"
234 *
235 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
236 *
237 * "- This field (Surface Vertical Aligment) is intended to be set to
238 * VALIGN_4 if the surface was rendered as a depth buffer, for a
239 * multisampled (4x) render target, or for a multisampled (8x)
240 * render target, since these surfaces support only alignment of 4.
241 * - Use of VALIGN_4 for other surfaces is supported, but uses more
242 * memory.
243 * - This field must be set to VALIGN_4 for all tiled Y Render Target
244 * surfaces.
245 * - Value of 1 is not supported for format YCRCB_NORMAL (0x182),
246 * YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY (0x190)
247 * - If Number of Multisamples is not MULTISAMPLECOUNT_1, this field
248 * must be set to VALIGN_4."
249 * - VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
250 *
251 * "- This field (Surface Horizontal Aligment) is intended to be set to
252 * HALIGN_8 only if the surface was rendered as a depth buffer with
253 * Z16 format or a stencil buffer, since these surfaces support only
254 * alignment of 8.
255 * - Use of HALIGN_8 for other surfaces is supported, but uses more
256 * memory.
257 * - This field must be set to HALIGN_4 if the Surface Format is BC*.
258 * - This field must be set to HALIGN_8 if the Surface Format is
259 * FXT1."
260 *
261 * They can be rephrased as
262 *
263 * align_i align_j
264 * compressed formats block width block height
265 * PIPE_FORMAT_Z16_UNORM 8 4
266 * PIPE_FORMAT_S8_UINT 8 8
267 * other depth/stencil formats 4 or 8 4
268 * 2x or 4x multisampled 4 or 8 4
269 * tiled Y 4 or 8 4 (if rt)
270 * PIPE_FORMAT_R32G32B32_FLOAT 4 or 8 2
271 * others 4 or 8 2 or 4
272 */
273
274 if (layout->compressed) {
275 /* this happens to be the case */
276 layout->align_i = layout->block_width;
277 layout->align_j = layout->block_height;
278 }
279 else if (layout->has_depth || layout->has_stencil) {
280 if (layout->dev->gen >= ILO_GEN(7)) {
281 switch (layout->format) {
282 case PIPE_FORMAT_Z16_UNORM:
283 layout->align_i = 8;
284 layout->align_j = 4;
285 break;
286 case PIPE_FORMAT_S8_UINT:
287 layout->align_i = 8;
288 layout->align_j = 8;
289 break;
290 default:
291 layout->align_i = 4;
292 layout->align_j = 4;
293 break;
294 }
295 }
296 else {
297 switch (layout->format) {
298 case PIPE_FORMAT_S8_UINT:
299 layout->align_i = 4;
300 layout->align_j = 2;
301 break;
302 default:
303 layout->align_i = 4;
304 layout->align_j = 4;
305 break;
306 }
307 }
308 }
309 else {
310 const bool valign_4 = (templ->nr_samples > 1) ||
311 (layout->dev->gen >= ILO_GEN(7) &&
312 layout->tiling == INTEL_TILING_Y &&
313 (templ->bind & PIPE_BIND_RENDER_TARGET));
314
315 if (valign_4)
316 assert(layout->block_size != 12);
317
318 layout->align_i = 4;
319 layout->align_j = (valign_4) ? 4 : 2;
320 }
321
322 /*
323 * the fact that align i and j are multiples of block width and height
324 * respectively is what makes the size of the bo a multiple of the block
325 * size, slices start at block boundaries, and many of the computations
326 * work.
327 */
328 assert(layout->align_i % layout->block_width == 0);
329 assert(layout->align_j % layout->block_height == 0);
330
331 /* make sure align() works */
332 assert(util_is_power_of_two(layout->align_i) &&
333 util_is_power_of_two(layout->align_j));
334 assert(util_is_power_of_two(layout->block_width) &&
335 util_is_power_of_two(layout->block_height));
336 }
337
338 static void
339 tex_layout_init_levels(struct tex_layout *layout)
340 {
341 const struct pipe_resource *templ = layout->templ;
342 int last_level, lv;
343
344 last_level = templ->last_level;
345
346 /* need at least 2 levels to compute full qpitch */
347 if (last_level == 0 && templ->array_size > 1 && layout->array_spacing_full)
348 last_level++;
349
350 /* compute mip level sizes */
351 for (lv = 0; lv <= last_level; lv++) {
352 int w, h, d;
353
354 w = u_minify(templ->width0, lv);
355 h = u_minify(templ->height0, lv);
356 d = u_minify(templ->depth0, lv);
357
358 /*
359 * From the Sandy Bridge PRM, volume 1 part 1, page 114:
360 *
361 * "The dimensions of the mip maps are first determined by applying
362 * the sizing algorithm presented in Non-Power-of-Two Mipmaps
363 * above. Then, if necessary, they are padded out to compression
364 * block boundaries."
365 */
366 w = align(w, layout->block_width);
367 h = align(h, layout->block_height);
368
369 /*
370 * From the Sandy Bridge PRM, volume 1 part 1, page 111:
371 *
372 * "If the surface is multisampled (4x), these values must be
373 * adjusted as follows before proceeding:
374 *
375 * W_L = ceiling(W_L / 2) * 4
376 * H_L = ceiling(H_L / 2) * 4"
377 *
378 * From the Ivy Bridge PRM, volume 1 part 1, page 108:
379 *
380 * "If the surface is multisampled and it is a depth or stencil
381 * surface or Multisampled Surface StorageFormat in SURFACE_STATE
382 * is MSFMT_DEPTH_STENCIL, W_L and H_L must be adjusted as follows
383 * before proceeding:
384 *
385 * #samples W_L = H_L =
386 * 2 ceiling(W_L / 2) * 4 HL [no adjustment]
387 * 4 ceiling(W_L / 2) * 4 ceiling(H_L / 2) * 4
388 * 8 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 4
389 * 16 ceiling(W_L / 2) * 8 ceiling(H_L / 2) * 8"
390 *
391 * For interleaved samples (4x), where pixels
392 *
393 * (x, y ) (x+1, y )
394 * (x, y+1) (x+1, y+1)
395 *
396 * would be is occupied by
397 *
398 * (x, y , si0) (x+1, y , si0) (x, y , si1) (x+1, y , si1)
399 * (x, y+1, si0) (x+1, y+1, si0) (x, y+1, si1) (x+1, y+1, si1)
400 * (x, y , si2) (x+1, y , si2) (x, y , si3) (x+1, y , si3)
401 * (x, y+1, si2) (x+1, y+1, si2) (x, y+1, si3) (x+1, y+1, si3)
402 *
403 * Thus the need to
404 *
405 * w = align(w, 2) * 2;
406 * y = align(y, 2) * 2;
407 */
408 if (layout->interleaved) {
409 switch (templ->nr_samples) {
410 case 0:
411 case 1:
412 break;
413 case 2:
414 w = align(w, 2) * 2;
415 break;
416 case 4:
417 w = align(w, 2) * 2;
418 h = align(h, 2) * 2;
419 break;
420 case 8:
421 w = align(w, 2) * 4;
422 h = align(h, 2) * 2;
423 break;
424 case 16:
425 w = align(w, 2) * 4;
426 h = align(h, 2) * 4;
427 break;
428 default:
429 assert(!"unsupported sample count");
430 break;
431 }
432 }
433
434 layout->levels[lv].w = w;
435 layout->levels[lv].h = h;
436 layout->levels[lv].d = d;
437 }
438 }
439
440 static void
441 tex_layout_init_spacing(struct tex_layout *layout)
442 {
443 const struct pipe_resource *templ = layout->templ;
444
445 if (layout->dev->gen >= ILO_GEN(7)) {
446 /*
447 * It is not explicitly states, but render targets are expected to be
448 * UMS/CMS (samples non-interleaved) and depth/stencil buffers are
449 * expected to be IMS (samples interleaved).
450 *
451 * See "Multisampled Surface Storage Format" field of SURFACE_STATE.
452 */
453 if (layout->has_depth || layout->has_stencil) {
454 layout->interleaved = true;
455
456 /*
457 * From the Ivy Bridge PRM, volume 1 part 1, page 111:
458 *
459 * "note that the depth buffer and stencil buffer have an implied
460 * value of ARYSPC_FULL"
461 */
462 layout->array_spacing_full = true;
463 }
464 else {
465 layout->interleaved = false;
466
467 /*
468 * From the Ivy Bridge PRM, volume 4 part 1, page 66:
469 *
470 * "If Multisampled Surface Storage Format is MSFMT_MSS and
471 * Number of Multisamples is not MULTISAMPLECOUNT_1, this field
472 * (Surface Array Spacing) must be set to ARYSPC_LOD0."
473 *
474 * As multisampled resources are not mipmapped, we never use
475 * ARYSPC_FULL for them.
476 */
477 if (templ->nr_samples > 1)
478 assert(templ->last_level == 0);
479 layout->array_spacing_full = (templ->last_level > 0);
480 }
481 }
482 else {
483 /* GEN6 supports only interleaved samples */
484 layout->interleaved = true;
485
486 /*
487 * From the Sandy Bridge PRM, volume 1 part 1, page 115:
488 *
489 * "The separate stencil buffer does not support mip mapping, thus
490 * the storage for LODs other than LOD 0 is not needed. The
491 * following QPitch equation applies only to the separate stencil
492 * buffer:
493 *
494 * QPitch = h_0"
495 *
496 * GEN6 does not support compact spacing otherwise.
497 */
498 layout->array_spacing_full = (layout->format != PIPE_FORMAT_S8_UINT);
499 }
500 }
501
502 static void
503 tex_layout_init_tiling(struct tex_layout *layout)
504 {
505 const struct pipe_resource *templ = layout->templ;
506 const enum pipe_format format = layout->format;
507 const unsigned tile_none = 1 << INTEL_TILING_NONE;
508 const unsigned tile_x = 1 << INTEL_TILING_X;
509 const unsigned tile_y = 1 << INTEL_TILING_Y;
510 unsigned valid_tilings = tile_none | tile_x | tile_y;
511
512 /*
513 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
514 *
515 * "Display/Overlay Y-Major not supported.
516 * X-Major required for Async Flips"
517 */
518 if (unlikely(templ->bind & PIPE_BIND_SCANOUT))
519 valid_tilings &= tile_x;
520
521 /*
522 * From the Sandy Bridge PRM, volume 3 part 2, page 158:
523 *
524 * "The cursor surface address must be 4K byte aligned. The cursor must
525 * be in linear memory, it cannot be tiled."
526 */
527 if (unlikely(templ->bind & (PIPE_BIND_CURSOR | PIPE_BIND_LINEAR)))
528 valid_tilings &= tile_none;
529
530 /*
531 * From the Ivy Bridge PRM, volume 4 part 1, page 76:
532 *
533 * "The MCS surface must be stored as Tile Y."
534 */
535 if (templ->bind & ILO_BIND_MCS)
536 valid_tilings &= tile_y;
537
538 /*
539 * From the Sandy Bridge PRM, volume 2 part 1, page 318:
540 *
541 * "[DevSNB+]: This field (Tiled Surface) must be set to TRUE. Linear
542 * Depth Buffer is not supported."
543 *
544 * "The Depth Buffer, if tiled, must use Y-Major tiling."
545 *
546 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
547 *
548 * "W-Major Tile Format is used for separate stencil."
549 *
550 * Since the HW does not support W-tiled fencing, we have to do it in the
551 * driver.
552 */
553 if (templ->bind & PIPE_BIND_DEPTH_STENCIL) {
554 switch (format) {
555 case PIPE_FORMAT_S8_UINT:
556 valid_tilings &= tile_none;
557 break;
558 default:
559 valid_tilings &= tile_y;
560 break;
561 }
562 }
563
564 if (templ->bind & PIPE_BIND_RENDER_TARGET) {
565 /*
566 * From the Sandy Bridge PRM, volume 1 part 2, page 32:
567 *
568 * "NOTE: 128BPE Format Color buffer ( render target ) MUST be
569 * either TileX or Linear."
570 */
571 if (layout->block_size == 16)
572 valid_tilings &= ~tile_y;
573
574 /*
575 * From the Ivy Bridge PRM, volume 4 part 1, page 63:
576 *
577 * "This field (Surface Vertical Aligment) must be set to VALIGN_4
578 * for all tiled Y Render Target surfaces."
579 *
580 * "VALIGN_4 is not supported for surface format R32G32B32_FLOAT."
581 */
582 if (layout->dev->gen >= ILO_GEN(7) && layout->block_size == 12)
583 valid_tilings &= ~tile_y;
584 }
585
586 /* no conflicting binding flags */
587 assert(valid_tilings);
588
589 layout->valid_tilings = valid_tilings;
590
591 if (templ->bind & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_SAMPLER_VIEW)) {
592 /*
593 * heuristically set a minimum width/height for enabling tiling
594 */
595 if (templ->width0 < 64 && (valid_tilings & ~tile_x))
596 valid_tilings &= ~tile_x;
597
598 if ((templ->width0 < 32 || templ->height0 < 16) &&
599 (templ->width0 < 16 || templ->height0 < 32) &&
600 (valid_tilings & ~tile_y))
601 valid_tilings &= ~tile_y;
602 }
603 else {
604 /* force linear if we are not sure where the texture is bound to */
605 if (valid_tilings & tile_none)
606 valid_tilings &= tile_none;
607 }
608
609 /* prefer tiled over linear */
610 if (valid_tilings & tile_y)
611 layout->tiling = INTEL_TILING_Y;
612 else if (valid_tilings & tile_x)
613 layout->tiling = INTEL_TILING_X;
614 else
615 layout->tiling = INTEL_TILING_NONE;
616 }
617
618 static void
619 tex_layout_init_format(struct tex_layout *layout)
620 {
621 const struct pipe_resource *templ = layout->templ;
622 enum pipe_format format;
623
624 switch (templ->format) {
625 case PIPE_FORMAT_ETC1_RGB8:
626 format = PIPE_FORMAT_R8G8B8X8_UNORM;
627 break;
628 case PIPE_FORMAT_Z24_UNORM_S8_UINT:
629 if (layout->separate_stencil)
630 format = PIPE_FORMAT_Z24X8_UNORM;
631 else
632 format = templ->format;
633 break;
634 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
635 if (layout->separate_stencil)
636 format = PIPE_FORMAT_Z32_FLOAT;
637 else
638 format = templ->format;
639 break;
640 default:
641 format = templ->format;
642 break;
643 }
644
645 layout->format = format;
646
647 layout->block_width = util_format_get_blockwidth(format);
648 layout->block_height = util_format_get_blockheight(format);
649 layout->block_size = util_format_get_blocksize(format);
650 layout->compressed = util_format_is_compressed(format);
651 }
652
653 static void
654 tex_layout_init_hiz(struct tex_layout *layout)
655 {
656 const struct pipe_resource *templ = layout->templ;
657 const struct util_format_description *desc;
658
659 desc = util_format_description(templ->format);
660 layout->has_depth = util_format_has_depth(desc);
661 layout->has_stencil = util_format_has_stencil(desc);
662
663 if (!layout->has_depth)
664 return;
665
666 layout->hiz = true;
667
668 /* no point in having HiZ */
669 if (templ->usage == PIPE_USAGE_STAGING)
670 layout->hiz = false;
671
672 if (layout->dev->gen == ILO_GEN(6)) {
673 /*
674 * From the Sandy Bridge PRM, volume 2 part 1, page 312:
675 *
676 * "The hierarchical depth buffer does not support the LOD field, it
677 * is assumed by hardware to be zero. A separate hierarachical
678 * depth buffer is required for each LOD used, and the
679 * corresponding buffer's state delivered to hardware each time a
680 * new depth buffer state with modified LOD is delivered."
681 *
682 * But we have a stronger requirement. Because of layer offsetting
683 * (check out the callers of ilo_texture_get_slice_offset()), we already
684 * have to require the texture to be non-mipmapped and non-array.
685 */
686 if (templ->last_level > 0 || templ->array_size > 1 || templ->depth0 > 1)
687 layout->hiz = false;
688 }
689
690 if (ilo_debug & ILO_DEBUG_NOHIZ)
691 layout->hiz = false;
692
693 if (layout->has_stencil) {
694 /*
695 * From the Sandy Bridge PRM, volume 2 part 1, page 317:
696 *
697 * "This field (Separate Stencil Buffer Enable) must be set to the
698 * same value (enabled or disabled) as Hierarchical Depth Buffer
699 * Enable."
700 *
701 * GEN7+ requires separate stencil buffers.
702 */
703 if (layout->dev->gen >= ILO_GEN(7))
704 layout->separate_stencil = true;
705 else
706 layout->separate_stencil = layout->hiz;
707
708 if (layout->separate_stencil)
709 layout->has_stencil = false;
710 }
711 }
712
713 static bool
714 tex_layout_init(struct tex_layout *layout,
715 struct pipe_screen *screen,
716 const struct pipe_resource *templ,
717 struct ilo_texture_slice **slices)
718 {
719 struct ilo_screen *is = ilo_screen(screen);
720
721 memset(layout, 0, sizeof(*layout));
722
723 layout->dev = &is->dev;
724 layout->templ = templ;
725
726 /* note that there are dependencies between these functions */
727 tex_layout_init_hiz(layout);
728 tex_layout_init_format(layout);
729 tex_layout_init_tiling(layout);
730 tex_layout_init_spacing(layout);
731 tex_layout_init_levels(layout);
732 tex_layout_init_alignments(layout);
733 tex_layout_init_qpitch(layout);
734
735 if (templ->flags & PIPE_RESOURCE_FLAG_MAP_PERSISTENT) {
736 /* require on-the-fly tiling/untiling or format conversion */
737 if (layout->separate_stencil ||
738 layout->format == PIPE_FORMAT_S8_UINT ||
739 layout->format != templ->format)
740 return false;
741 }
742
743 if (slices) {
744 int lv;
745
746 for (lv = 0; lv <= templ->last_level; lv++)
747 layout->levels[lv].slices = slices[lv];
748 }
749
750 return true;
751 }
752
753 static void
754 tex_layout_align(struct tex_layout *layout)
755 {
756 int align_w = 1, align_h = 1, pad_h = 0;
757
758 /*
759 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
760 *
761 * "To determine the necessary padding on the bottom and right side of
762 * the surface, refer to the table in Section 7.18.3.4 for the i and j
763 * parameters for the surface format in use. The surface must then be
764 * extended to the next multiple of the alignment unit size in each
765 * dimension, and all texels contained in this extended surface must
766 * have valid GTT entries."
767 *
768 * "For cube surfaces, an additional two rows of padding are required
769 * at the bottom of the surface. This must be ensured regardless of
770 * whether the surface is stored tiled or linear. This is due to the
771 * potential rotation of cache line orientation from memory to cache."
772 *
773 * "For compressed textures (BC* and FXT1 surface formats), padding at
774 * the bottom of the surface is to an even compressed row, which is
775 * equal to a multiple of 8 uncompressed texel rows. Thus, for padding
776 * purposes, these surfaces behave as if j = 8 only for surface
777 * padding purposes. The value of 4 for j still applies for mip level
778 * alignment and QPitch calculation."
779 */
780 if (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) {
781 align_w = MAX2(align_w, layout->align_i);
782 align_h = MAX2(align_h, layout->align_j);
783
784 if (layout->templ->target == PIPE_TEXTURE_CUBE)
785 pad_h += 2;
786
787 if (layout->compressed)
788 align_h = MAX2(align_h, layout->align_j * 2);
789 }
790
791 /*
792 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
793 *
794 * "If the surface contains an odd number of rows of data, a final row
795 * below the surface must be allocated."
796 */
797 if (layout->templ->bind & PIPE_BIND_RENDER_TARGET)
798 align_h = MAX2(align_h, 2);
799
800 /*
801 * Depth Buffer Clear/Resolve works in 8x4 sample blocks. In
802 * ilo_texture_can_enable_hiz(), we always return true for the first slice.
803 * To avoid out-of-bound access, we have to pad.
804 */
805 if (layout->hiz) {
806 align_w = MAX2(align_w, 8);
807 align_h = MAX2(align_h, 4);
808 }
809
810 layout->width = align(layout->width, align_w);
811 layout->height = align(layout->height + pad_h, align_h);
812 }
813
814 /**
815 * Layout a 2D texture.
816 */
817 static void
818 tex_layout_2d(struct tex_layout *layout)
819 {
820 const struct pipe_resource *templ = layout->templ;
821 unsigned int level_x, level_y, num_slices;
822 int lv;
823
824 level_x = 0;
825 level_y = 0;
826 for (lv = 0; lv <= templ->last_level; lv++) {
827 const unsigned int level_w = layout->levels[lv].w;
828 const unsigned int level_h = layout->levels[lv].h;
829 int slice;
830
831 /* set slice offsets */
832 if (layout->levels[lv].slices) {
833 for (slice = 0; slice < templ->array_size; slice++) {
834 layout->levels[lv].slices[slice].x = level_x;
835 /* slices are qpitch apart in Y-direction */
836 layout->levels[lv].slices[slice].y =
837 level_y + layout->qpitch * slice;
838 }
839 }
840
841 /* extend the size of the monolithic bo to cover this mip level */
842 if (layout->width < level_x + level_w)
843 layout->width = level_x + level_w;
844 if (layout->height < level_y + level_h)
845 layout->height = level_y + level_h;
846
847 /* MIPLAYOUT_BELOW */
848 if (lv == 1)
849 level_x += align(level_w, layout->align_i);
850 else
851 level_y += align(level_h, layout->align_j);
852 }
853
854 num_slices = templ->array_size;
855 /* samples of the same index are stored in a slice */
856 if (templ->nr_samples > 1 && !layout->interleaved)
857 num_slices *= templ->nr_samples;
858
859 /* we did not take slices into consideration in the computation above */
860 layout->height += layout->qpitch * (num_slices - 1);
861
862 tex_layout_align(layout);
863 }
864
865 /**
866 * Layout a 3D texture.
867 */
868 static void
869 tex_layout_3d(struct tex_layout *layout)
870 {
871 const struct pipe_resource *templ = layout->templ;
872 unsigned int level_y;
873 int lv;
874
875 level_y = 0;
876 for (lv = 0; lv <= templ->last_level; lv++) {
877 const unsigned int level_w = layout->levels[lv].w;
878 const unsigned int level_h = layout->levels[lv].h;
879 const unsigned int level_d = layout->levels[lv].d;
880 const unsigned int slice_pitch = align(level_w, layout->align_i);
881 const unsigned int slice_qpitch = align(level_h, layout->align_j);
882 const unsigned int num_slices_per_row = 1 << lv;
883 int slice;
884
885 for (slice = 0; slice < level_d; slice += num_slices_per_row) {
886 int i;
887
888 /* set slice offsets */
889 if (layout->levels[lv].slices) {
890 for (i = 0; i < num_slices_per_row && slice + i < level_d; i++) {
891 layout->levels[lv].slices[slice + i].x = slice_pitch * i;
892 layout->levels[lv].slices[slice + i].y = level_y;
893 }
894 }
895
896 /* move on to the next slice row */
897 level_y += slice_qpitch;
898 }
899
900 /* rightmost slice */
901 slice = MIN2(num_slices_per_row, level_d) - 1;
902
903 /* extend the size of the monolithic bo to cover this slice */
904 if (layout->width < slice_pitch * slice + level_w)
905 layout->width = slice_pitch * slice + level_w;
906 if (lv == templ->last_level)
907 layout->height = (level_y - slice_qpitch) + level_h;
908 }
909
910 tex_layout_align(layout);
911 }
912
913 /* note that this may force the texture to be linear */
914 static bool
915 tex_layout_calculate_bo_size(struct tex_layout *layout)
916 {
917 assert(layout->width % layout->block_width == 0);
918 assert(layout->height % layout->block_height == 0);
919 assert(layout->qpitch % layout->block_height == 0);
920
921 layout->bo_stride =
922 (layout->width / layout->block_width) * layout->block_size;
923 layout->bo_height = layout->height / layout->block_height;
924
925 while (true) {
926 int w = layout->bo_stride, h = layout->bo_height;
927 int align_w, align_h;
928
929 /*
930 * From the Haswell PRM, volume 5, page 163:
931 *
932 * "For linear surfaces, additional padding of 64 bytes is required
933 * at the bottom of the surface. This is in addition to the padding
934 * required above."
935 */
936 if (layout->dev->gen >= ILO_GEN(7.5) &&
937 (layout->templ->bind & PIPE_BIND_SAMPLER_VIEW) &&
938 layout->tiling == INTEL_TILING_NONE) {
939 layout->bo_height +=
940 (64 + layout->bo_stride - 1) / layout->bo_stride;
941 }
942
943 /*
944 * From the Sandy Bridge PRM, volume 4 part 1, page 81:
945 *
946 * "- For linear render target surfaces, the pitch must be a
947 * multiple of the element size for non-YUV surface formats.
948 * Pitch must be a multiple of 2 * element size for YUV surface
949 * formats.
950 * - For other linear surfaces, the pitch can be any multiple of
951 * bytes.
952 * - For tiled surfaces, the pitch must be a multiple of the tile
953 * width."
954 *
955 * Different requirements may exist when the bo is used in different
956 * places, but our alignments here should be good enough that we do not
957 * need to check layout->templ->bind.
958 */
959 switch (layout->tiling) {
960 case INTEL_TILING_X:
961 align_w = 512;
962 align_h = 8;
963 break;
964 case INTEL_TILING_Y:
965 align_w = 128;
966 align_h = 32;
967 break;
968 default:
969 if (layout->format == PIPE_FORMAT_S8_UINT) {
970 /*
971 * From the Sandy Bridge PRM, volume 1 part 2, page 22:
972 *
973 * "A 4KB tile is subdivided into 8-high by 8-wide array of
974 * Blocks for W-Major Tiles (W Tiles). Each Block is 8 rows by 8
975 * bytes."
976 *
977 * Since we asked for INTEL_TILING_NONE instead of the non-existent
978 * INTEL_TILING_W, we want to align to W tiles here.
979 */
980 align_w = 64;
981 align_h = 64;
982 }
983 else {
984 /* some good enough values */
985 align_w = 64;
986 align_h = 2;
987 }
988 break;
989 }
990
991 w = align(w, align_w);
992 h = align(h, align_h);
993
994 /* make sure the bo is mappable */
995 if (layout->tiling != INTEL_TILING_NONE) {
996 /*
997 * Usually only the first 256MB of the GTT is mappable.
998 *
999 * See also how intel_context::max_gtt_map_object_size is calculated.
1000 */
1001 const size_t mappable_gtt_size = 256 * 1024 * 1024;
1002
1003 /*
1004 * Be conservative. We may be able to switch from VALIGN_4 to
1005 * VALIGN_2 if the layout was Y-tiled, but let's keep it simple.
1006 */
1007 if (mappable_gtt_size / w / 4 < h) {
1008 if (layout->valid_tilings & (1 << INTEL_TILING_NONE)) {
1009 layout->tiling = INTEL_TILING_NONE;
1010 continue;
1011 }
1012 else {
1013 ilo_warn("cannot force texture to be linear\n");
1014 }
1015 }
1016 }
1017
1018 layout->bo_stride = w;
1019 layout->bo_height = h;
1020 break;
1021 }
1022
1023 return (layout->bo_height <= max_resource_size / layout->bo_stride);
1024 }
1025
1026 static void
1027 tex_layout_calculate_hiz_size(struct tex_layout *layout)
1028 {
1029 const struct pipe_resource *templ = layout->templ;
1030 const int hz_align_j = 8;
1031 int hz_width, hz_height;
1032
1033 if (!layout->hiz)
1034 return;
1035
1036 /*
1037 * See the Sandy Bridge PRM, volume 2 part 1, page 312, and the Ivy Bridge
1038 * PRM, volume 2 part 1, page 312-313.
1039 *
1040 * It seems HiZ buffer is aligned to 8x8, with every two rows packed into a
1041 * memory row.
1042 */
1043
1044 hz_width = align(layout->levels[0].w, 16);
1045
1046 if (templ->target == PIPE_TEXTURE_3D) {
1047 unsigned lv;
1048
1049 hz_height = 0;
1050
1051 for (lv = 0; lv <= templ->last_level; lv++) {
1052 const unsigned h = align(layout->levels[lv].h, hz_align_j);
1053 hz_height += h * layout->levels[lv].d;
1054 }
1055
1056 hz_height /= 2;
1057 }
1058 else {
1059 const unsigned h0 = align(layout->levels[0].h, hz_align_j);
1060 unsigned hz_qpitch = h0;
1061
1062 if (layout->array_spacing_full) {
1063 const unsigned h1 = align(layout->levels[1].h, hz_align_j);
1064 const unsigned htail =
1065 ((layout->dev->gen >= ILO_GEN(7)) ? 12 : 11) * hz_align_j;
1066
1067 hz_qpitch += h1 + htail;
1068 }
1069
1070 hz_height = hz_qpitch * templ->array_size / 2;
1071
1072 if (layout->dev->gen >= ILO_GEN(7))
1073 hz_height = align(hz_height, 8);
1074 }
1075
1076 /* align to Y-tile */
1077 layout->hiz_stride = align(hz_width, 128);
1078 layout->hiz_height = align(hz_height, 32);
1079 }
1080
1081 static void
1082 tex_free_slices(struct ilo_texture *tex)
1083 {
1084 FREE(tex->slices[0]);
1085 }
1086
1087 static bool
1088 tex_alloc_slices(struct ilo_texture *tex)
1089 {
1090 const struct pipe_resource *templ = &tex->base;
1091 struct ilo_texture_slice *slices;
1092 int depth, lv;
1093
1094 /* sum the depths of all levels */
1095 depth = 0;
1096 for (lv = 0; lv <= templ->last_level; lv++)
1097 depth += u_minify(templ->depth0, lv);
1098
1099 /*
1100 * There are (depth * tex->base.array_size) slices in total. Either depth
1101 * is one (non-3D) or templ->array_size is one (non-array), but it does
1102 * not matter.
1103 */
1104 slices = CALLOC(depth * templ->array_size, sizeof(*slices));
1105 if (!slices)
1106 return false;
1107
1108 tex->slices[0] = slices;
1109
1110 /* point to the respective positions in the buffer */
1111 for (lv = 1; lv <= templ->last_level; lv++) {
1112 tex->slices[lv] = tex->slices[lv - 1] +
1113 u_minify(templ->depth0, lv - 1) * templ->array_size;
1114 }
1115
1116 return true;
1117 }
1118
1119 static bool
1120 tex_import_handle(struct ilo_texture *tex,
1121 const struct tex_layout *layout,
1122 const struct winsys_handle *handle)
1123 {
1124 struct ilo_screen *is = ilo_screen(tex->base.screen);
1125 const char *name = resource_get_bo_name(&tex->base);
1126 enum intel_tiling_mode tiling;
1127 unsigned long pitch;
1128
1129 tex->bo = intel_winsys_import_handle(is->winsys, name, handle,
1130 tex->bo_height, &tiling, &pitch);
1131 if (!tex->bo)
1132 return false;
1133
1134 if (!(layout->valid_tilings & (1 << tiling))) {
1135 ilo_err("imported handle has incompatible tiling\n");
1136 intel_bo_unreference(tex->bo);
1137 tex->bo = NULL;
1138 return false;
1139 }
1140
1141 tex->tiling = tiling;
1142 tex->bo_stride = pitch;
1143
1144 return true;
1145 }
1146
1147 static bool
1148 tex_create_bo(struct ilo_texture *tex)
1149 {
1150 struct ilo_screen *is = ilo_screen(tex->base.screen);
1151 const char *name = resource_get_bo_name(&tex->base);
1152 const enum intel_domain_flag initial_domain =
1153 resource_get_bo_initial_domain(&tex->base);
1154
1155 tex->bo = intel_winsys_alloc_bo(is->winsys, name, tex->tiling,
1156 tex->bo_stride, tex->bo_height, initial_domain);
1157
1158 return (tex->bo != NULL);
1159 }
1160
1161 static bool
1162 tex_create_separate_stencil(struct ilo_texture *tex)
1163 {
1164 struct pipe_resource templ = tex->base;
1165 struct pipe_resource *s8;
1166
1167 /*
1168 * Unless PIPE_BIND_DEPTH_STENCIL is set, the resource may have other
1169 * tilings. But that should be fine since it will never be bound as the
1170 * stencil buffer, and our transfer code can handle all tilings.
1171 */
1172 templ.format = PIPE_FORMAT_S8_UINT;
1173
1174 s8 = tex->base.screen->resource_create(tex->base.screen, &templ);
1175 if (!s8)
1176 return false;
1177
1178 tex->separate_s8 = ilo_texture(s8);
1179
1180 assert(tex->separate_s8->bo_format == PIPE_FORMAT_S8_UINT);
1181
1182 return true;
1183 }
1184
1185 static bool
1186 tex_create_hiz(struct ilo_texture *tex, const struct tex_layout *layout)
1187 {
1188 struct ilo_screen *is = ilo_screen(tex->base.screen);
1189 const struct pipe_resource *templ = layout->templ;
1190 unsigned lv;
1191
1192 tex->hiz.bo = intel_winsys_alloc_bo(is->winsys, "hiz texture",
1193 INTEL_TILING_Y, layout->hiz_stride, layout->hiz_height,
1194 INTEL_DOMAIN_RENDER);
1195 if (!tex->hiz.bo)
1196 return false;
1197
1198 tex->hiz.bo_stride = layout->hiz_stride;
1199
1200 /*
1201 * From the Sandy Bridge PRM, volume 2 part 1, page 313-314:
1202 *
1203 * "A rectangle primitive representing the clear area is delivered. The
1204 * primitive must adhere to the following restrictions on size:
1205 *
1206 * - If Number of Multisamples is NUMSAMPLES_1, the rectangle must be
1207 * aligned to an 8x4 pixel block relative to the upper left corner
1208 * of the depth buffer, and contain an integer number of these pixel
1209 * blocks, and all 8x4 pixels must be lit.
1210 *
1211 * - If Number of Multisamples is NUMSAMPLES_4, the rectangle must be
1212 * aligned to a 4x2 pixel block (8x4 sample block) relative to the
1213 * upper left corner of the depth buffer, and contain an integer
1214 * number of these pixel blocks, and all samples of the 4x2 pixels
1215 * must be lit
1216 *
1217 * - If Number of Multisamples is NUMSAMPLES_8, the rectangle must be
1218 * aligned to a 2x2 pixel block (8x4 sample block) relative to the
1219 * upper left corner of the depth buffer, and contain an integer
1220 * number of these pixel blocks, and all samples of the 2x2 pixels
1221 * must be list."
1222 *
1223 * "The following is required when performing a depth buffer resolve:
1224 *
1225 * - A rectangle primitive of the same size as the previous depth
1226 * buffer clear operation must be delivered, and depth buffer state
1227 * cannot have changed since the previous depth buffer clear
1228 * operation."
1229 *
1230 * Experiments on Haswell show that depth buffer resolves have the same
1231 * alignment requirements, and aligning the RECTLIST primitive and
1232 * 3DSTATE_DRAWING_RECTANGLE alone are not enough. The mipmap size must be
1233 * aligned.
1234 */
1235 for (lv = 0; lv <= templ->last_level; lv++) {
1236 unsigned align_w = 8, align_h = 4;
1237 unsigned flags = 0;
1238
1239 switch (templ->nr_samples) {
1240 case 0:
1241 case 1:
1242 break;
1243 case 2:
1244 align_w /= 2;
1245 break;
1246 case 4:
1247 align_w /= 2;
1248 align_h /= 2;
1249 break;
1250 case 8:
1251 default:
1252 align_w /= 4;
1253 align_h /= 2;
1254 break;
1255 }
1256
1257 if (u_minify(templ->width0, lv) % align_w == 0 &&
1258 u_minify(templ->height0, lv) % align_h == 0) {
1259 flags |= ILO_TEXTURE_HIZ;
1260
1261 /* this will trigger a HiZ resolve */
1262 if (tex->imported)
1263 flags |= ILO_TEXTURE_CPU_WRITE;
1264 }
1265
1266 if (flags) {
1267 const unsigned num_slices = (templ->target == PIPE_TEXTURE_3D) ?
1268 u_minify(templ->depth0, lv) : templ->array_size;
1269 ilo_texture_set_slice_flags(tex, lv, 0, num_slices, flags, flags);
1270 }
1271 }
1272
1273 return true;
1274 }
1275
1276 static bool
1277 tex_apply_layout(struct ilo_texture *tex,
1278 const struct tex_layout *layout,
1279 const struct winsys_handle *handle)
1280 {
1281 tex->bo_format = layout->format;
1282 tex->block_width = layout->block_width;
1283 tex->block_height = layout->block_height;
1284 tex->block_size = layout->block_size;
1285
1286 tex->tiling = layout->tiling;
1287 tex->bo_stride = layout->bo_stride;
1288 tex->bo_height = layout->bo_height;
1289
1290 tex->halign_8 = (layout->align_i == 8);
1291 tex->valign_4 = (layout->align_j == 4);
1292 tex->array_spacing_full = layout->array_spacing_full;
1293 tex->interleaved = layout->interleaved;
1294
1295 if (handle) {
1296 if (!tex_import_handle(tex, layout, handle))
1297 return false;
1298 }
1299 else {
1300 if (!tex_create_bo(tex))
1301 return false;
1302 }
1303
1304 /* allocate separate stencil resource */
1305 if (layout->separate_stencil && !tex_create_separate_stencil(tex))
1306 return false;
1307
1308 if (layout->hiz && !tex_create_hiz(tex, layout)) {
1309 /* Separate Stencil Buffer requires HiZ to be enabled */
1310 if (layout->dev->gen == ILO_GEN(6) && layout->separate_stencil)
1311 return false;
1312 }
1313
1314 return true;
1315 }
1316
1317 /**
1318 * The texutre is for transfer only. We can define our own layout to save
1319 * space.
1320 */
1321 static bool
1322 tex_apply_transfer_layout(struct ilo_texture *tex)
1323 {
1324 const struct pipe_resource *templ = &tex->base;
1325 const unsigned num_slices = (templ->target == PIPE_TEXTURE_3D) ?
1326 templ->depth0 : templ->array_size;
1327 unsigned slice_width, slice_height, i;
1328
1329 assert(templ->last_level == 0);
1330
1331 tex->bo_format = templ->format;
1332 tex->block_width = util_format_get_blockwidth(templ->format);
1333 tex->block_height = util_format_get_blockheight(templ->format);
1334 tex->block_size = util_format_get_blocksize(templ->format);
1335
1336 assert(util_is_power_of_two(tex->block_width) &&
1337 util_is_power_of_two(tex->block_height));
1338
1339 /* use packed layout */
1340 slice_width = align(templ->width0, tex->block_width);
1341 slice_height = align(templ->height0, tex->block_height);
1342 for (i = 0; i < num_slices; i++) {
1343 tex->slices[0][i].x = 0;
1344 tex->slices[0][i].y = slice_height * i;
1345 }
1346
1347 tex->tiling = INTEL_TILING_NONE;
1348 tex->bo_stride = (slice_width / tex->block_width) * tex->block_size;
1349 tex->bo_stride = align(tex->bo_stride, 64);
1350 tex->bo_height = (slice_height / tex->block_height) * num_slices;
1351
1352 return tex_create_bo(tex);
1353 }
1354
1355 static void
1356 tex_destroy(struct ilo_texture *tex)
1357 {
1358 if (tex->hiz.bo)
1359 intel_bo_unreference(tex->hiz.bo);
1360
1361 if (tex->separate_s8)
1362 tex_destroy(tex->separate_s8);
1363
1364 if (tex->bo)
1365 intel_bo_unreference(tex->bo);
1366
1367 tex_free_slices(tex);
1368 FREE(tex);
1369 }
1370
1371 static struct pipe_resource *
1372 tex_create(struct pipe_screen *screen,
1373 const struct pipe_resource *templ,
1374 const struct winsys_handle *handle)
1375 {
1376 struct tex_layout layout;
1377 struct ilo_texture *tex;
1378 bool transfer_only;
1379
1380 tex = CALLOC_STRUCT(ilo_texture);
1381 if (!tex)
1382 return NULL;
1383
1384 tex->base = *templ;
1385 tex->base.screen = screen;
1386 pipe_reference_init(&tex->base.reference, 1);
1387
1388 if (!tex_alloc_slices(tex)) {
1389 FREE(tex);
1390 return NULL;
1391 }
1392
1393 tex->imported = (handle != NULL);
1394
1395 /* use transfer layout when the texture is never bound to GPU */
1396 transfer_only = !(templ->bind & ~(PIPE_BIND_TRANSFER_WRITE |
1397 PIPE_BIND_TRANSFER_READ));
1398 if (transfer_only && templ->last_level == 0) {
1399 if (!tex_apply_transfer_layout(tex)) {
1400 tex_destroy(tex);
1401 return NULL;
1402 }
1403
1404 return &tex->base;
1405 }
1406
1407 if (!tex_layout_init(&layout, screen, templ, tex->slices)) {
1408 tex_destroy(tex);
1409 return NULL;
1410 }
1411
1412 switch (templ->target) {
1413 case PIPE_TEXTURE_1D:
1414 case PIPE_TEXTURE_2D:
1415 case PIPE_TEXTURE_CUBE:
1416 case PIPE_TEXTURE_RECT:
1417 case PIPE_TEXTURE_1D_ARRAY:
1418 case PIPE_TEXTURE_2D_ARRAY:
1419 case PIPE_TEXTURE_CUBE_ARRAY:
1420 tex_layout_2d(&layout);
1421 break;
1422 case PIPE_TEXTURE_3D:
1423 tex_layout_3d(&layout);
1424 break;
1425 default:
1426 assert(!"unknown resource target");
1427 break;
1428 }
1429
1430 if (!tex_layout_calculate_bo_size(&layout)) {
1431 tex_destroy(tex);
1432 return NULL;
1433 }
1434
1435 tex_layout_calculate_hiz_size(&layout);
1436
1437 if (!tex_apply_layout(tex, &layout, handle)) {
1438 tex_destroy(tex);
1439 return NULL;
1440 }
1441
1442 return &tex->base;
1443 }
1444
1445 static bool
1446 tex_get_handle(struct ilo_texture *tex, struct winsys_handle *handle)
1447 {
1448 struct ilo_screen *is = ilo_screen(tex->base.screen);
1449 int err;
1450
1451 err = intel_winsys_export_handle(is->winsys, tex->bo,
1452 tex->tiling, tex->bo_stride, tex->bo_height, handle);
1453
1454 return !err;
1455 }
1456
1457 static bool
1458 buf_create_bo(struct ilo_buffer *buf)
1459 {
1460 struct ilo_screen *is = ilo_screen(buf->base.screen);
1461 const char *name = resource_get_bo_name(&buf->base);
1462 const enum intel_domain_flag initial_domain =
1463 resource_get_bo_initial_domain(&buf->base);
1464
1465 buf->bo = intel_winsys_alloc_buffer(is->winsys, name,
1466 buf->bo_size, initial_domain);
1467
1468 return (buf->bo != NULL);
1469 }
1470
1471 static void
1472 buf_destroy(struct ilo_buffer *buf)
1473 {
1474 intel_bo_unreference(buf->bo);
1475 FREE(buf);
1476 }
1477
1478 static struct pipe_resource *
1479 buf_create(struct pipe_screen *screen, const struct pipe_resource *templ)
1480 {
1481 const struct ilo_screen *is = ilo_screen(screen);
1482 struct ilo_buffer *buf;
1483
1484 buf = CALLOC_STRUCT(ilo_buffer);
1485 if (!buf)
1486 return NULL;
1487
1488 buf->base = *templ;
1489 buf->base.screen = screen;
1490 pipe_reference_init(&buf->base.reference, 1);
1491
1492 buf->bo_size = templ->width0;
1493
1494 /*
1495 * From the Sandy Bridge PRM, volume 1 part 1, page 118:
1496 *
1497 * "For buffers, which have no inherent "height," padding requirements
1498 * are different. A buffer must be padded to the next multiple of 256
1499 * array elements, with an additional 16 bytes added beyond that to
1500 * account for the L1 cache line."
1501 */
1502 if (templ->bind & PIPE_BIND_SAMPLER_VIEW)
1503 buf->bo_size = align(buf->bo_size, 256) + 16;
1504
1505 if ((templ->bind & PIPE_BIND_VERTEX_BUFFER) &&
1506 is->dev.gen < ILO_GEN(7.5)) {
1507 /*
1508 * As noted in ilo_translate_format(), we treat some 3-component formats
1509 * as 4-component formats to work around hardware limitations. Imagine
1510 * the case where the vertex buffer holds a single
1511 * PIPE_FORMAT_R16G16B16_FLOAT vertex, and buf->bo_size is 6. The
1512 * hardware would fail to fetch it at boundary check because the vertex
1513 * buffer is expected to hold a PIPE_FORMAT_R16G16B16A16_FLOAT vertex
1514 * and that takes at least 8 bytes.
1515 *
1516 * For the workaround to work, we should add 2 to the bo size. But that
1517 * would waste a page when the bo size is already page aligned. Let's
1518 * round it to page size for now and revisit this when needed.
1519 */
1520 buf->bo_size = align(buf->bo_size, 4096);
1521 }
1522
1523 if (buf->bo_size < templ->width0 ||
1524 buf->bo_size > max_resource_size ||
1525 !buf_create_bo(buf)) {
1526 FREE(buf);
1527 return NULL;
1528 }
1529
1530 return &buf->base;
1531 }
1532
1533 static boolean
1534 ilo_can_create_resource(struct pipe_screen *screen,
1535 const struct pipe_resource *templ)
1536 {
1537 struct tex_layout layout;
1538
1539 if (templ->target == PIPE_BUFFER)
1540 return (templ->width0 <= max_resource_size);
1541
1542 tex_layout_init(&layout, screen, templ, NULL);
1543
1544 switch (templ->target) {
1545 case PIPE_TEXTURE_3D:
1546 tex_layout_3d(&layout);
1547 break;
1548 default:
1549 tex_layout_2d(&layout);
1550 break;
1551 }
1552
1553 return tex_layout_calculate_bo_size(&layout);
1554 }
1555
1556 static struct pipe_resource *
1557 ilo_resource_create(struct pipe_screen *screen,
1558 const struct pipe_resource *templ)
1559 {
1560 if (templ->target == PIPE_BUFFER)
1561 return buf_create(screen, templ);
1562 else
1563 return tex_create(screen, templ, NULL);
1564 }
1565
1566 static struct pipe_resource *
1567 ilo_resource_from_handle(struct pipe_screen *screen,
1568 const struct pipe_resource *templ,
1569 struct winsys_handle *handle)
1570 {
1571 if (templ->target == PIPE_BUFFER)
1572 return NULL;
1573 else
1574 return tex_create(screen, templ, handle);
1575 }
1576
1577 static boolean
1578 ilo_resource_get_handle(struct pipe_screen *screen,
1579 struct pipe_resource *res,
1580 struct winsys_handle *handle)
1581 {
1582 if (res->target == PIPE_BUFFER)
1583 return false;
1584 else
1585 return tex_get_handle(ilo_texture(res), handle);
1586
1587 }
1588
1589 static void
1590 ilo_resource_destroy(struct pipe_screen *screen,
1591 struct pipe_resource *res)
1592 {
1593 if (res->target == PIPE_BUFFER)
1594 buf_destroy(ilo_buffer(res));
1595 else
1596 tex_destroy(ilo_texture(res));
1597 }
1598
1599 /**
1600 * Initialize resource-related functions.
1601 */
1602 void
1603 ilo_init_resource_functions(struct ilo_screen *is)
1604 {
1605 is->base.can_create_resource = ilo_can_create_resource;
1606 is->base.resource_create = ilo_resource_create;
1607 is->base.resource_from_handle = ilo_resource_from_handle;
1608 is->base.resource_get_handle = ilo_resource_get_handle;
1609 is->base.resource_destroy = ilo_resource_destroy;
1610 }
1611
1612 bool
1613 ilo_buffer_rename_bo(struct ilo_buffer *buf)
1614 {
1615 struct intel_bo *old_bo = buf->bo;
1616
1617 if (buf_create_bo(buf)) {
1618 intel_bo_unreference(old_bo);
1619 return true;
1620 }
1621 else {
1622 buf->bo = old_bo;
1623 return false;
1624 }
1625 }
1626
1627 bool
1628 ilo_texture_rename_bo(struct ilo_texture *tex)
1629 {
1630 struct intel_bo *old_bo = tex->bo;
1631
1632 /* an imported texture cannot be renamed */
1633 if (tex->imported)
1634 return false;
1635
1636 if (tex_create_bo(tex)) {
1637 intel_bo_unreference(old_bo);
1638 return true;
1639 }
1640 else {
1641 tex->bo = old_bo;
1642 return false;
1643 }
1644 }
1645
1646 /**
1647 * Return the offset (in bytes) to a slice within the bo.
1648 *
1649 * The returned offset is aligned to tile size. Since slices are not
1650 * guaranteed to start at tile boundaries, the X and Y offsets (in pixels)
1651 * from the tile origin to the slice are also returned. X offset is always a
1652 * multiple of 4 and Y offset is always a multiple of 2.
1653 */
1654 unsigned
1655 ilo_texture_get_slice_offset(const struct ilo_texture *tex,
1656 unsigned level, unsigned slice,
1657 unsigned *x_offset, unsigned *y_offset)
1658 {
1659 const struct ilo_texture_slice *s =
1660 ilo_texture_get_slice(tex, level, slice);
1661 unsigned tile_w, tile_h, tile_size, row_size;
1662 unsigned x, y, slice_offset;
1663
1664 /* see the Sandy Bridge PRM, volume 1 part 2, page 24 */
1665
1666 switch (tex->tiling) {
1667 case INTEL_TILING_NONE:
1668 /* W-tiled */
1669 if (tex->bo_format == PIPE_FORMAT_S8_UINT) {
1670 tile_w = 64;
1671 tile_h = 64;
1672 }
1673 else {
1674 tile_w = 1;
1675 tile_h = 1;
1676 }
1677 break;
1678 case INTEL_TILING_X:
1679 tile_w = 512;
1680 tile_h = 8;
1681 break;
1682 case INTEL_TILING_Y:
1683 tile_w = 128;
1684 tile_h = 32;
1685 break;
1686 default:
1687 assert(!"unknown tiling");
1688 tile_w = 1;
1689 tile_h = 1;
1690 break;
1691 }
1692
1693 tile_size = tile_w * tile_h;
1694 row_size = tex->bo_stride * tile_h;
1695
1696 /* in bytes */
1697 x = s->x / tex->block_width * tex->block_size;
1698 y = s->y / tex->block_height;
1699 slice_offset = row_size * (y / tile_h) + tile_size * (x / tile_w);
1700
1701 /*
1702 * Since tex->bo_stride is a multiple of tile_w, slice_offset should be
1703 * aligned at this point.
1704 */
1705 assert(slice_offset % tile_size == 0);
1706
1707 /*
1708 * because of the possible values of align_i and align_j in
1709 * tex_layout_init_alignments(), x_offset is guaranteed to be a multiple of
1710 * 4 and y_offset is guaranteed to be a multiple of 2.
1711 */
1712 if (x_offset) {
1713 /* in pixels */
1714 x = (x % tile_w) / tex->block_size * tex->block_width;
1715 assert(x % 4 == 0);
1716
1717 *x_offset = x;
1718 }
1719
1720 if (y_offset) {
1721 /* in pixels */
1722 y = (y % tile_h) * tex->block_height;
1723 assert(y % 2 == 0);
1724
1725 *y_offset = y;
1726 }
1727
1728 return slice_offset;
1729 }