72b02a2cf0a0d261bc65b1fb1fe9d4f017e7a088
[mesa.git] / src / mesa / drivers / dri / i965 / brw_tex_layout.c
1 /*
2 * Copyright 2006 VMware, Inc.
3 * Copyright © 2006 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file brw_tex_layout.cpp
28 *
29 * Code to lay out images in a mipmap tree.
30 *
31 * \author Keith Whitwell <keithw@vmware.com>
32 * \author Michel Dänzer <daenzer@vmware.com>
33 */
34
35 #include "intel_mipmap_tree.h"
36 #include "brw_context.h"
37 #include "main/macros.h"
38 #include "main/glformats.h"
39
40 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
41
42 static unsigned int
43 intel_horizontal_texture_alignment_unit(struct brw_context *brw,
44 struct intel_mipmap_tree *mt)
45 {
46 /**
47 * From the "Alignment Unit Size" section of various specs, namely:
48 * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
49 * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
50 * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
51 * - BSpec (for Ivybridge and slight variations in separate stencil)
52 *
53 * +----------------------------------------------------------------------+
54 * | | alignment unit width ("i") |
55 * | Surface Property |-----------------------------|
56 * | | 915 | 965 | ILK | SNB | IVB |
57 * +----------------------------------------------------------------------+
58 * | YUV 4:2:2 format | 8 | 4 | 4 | 4 | 4 |
59 * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 |
60 * | FXT1 compressed format | 8 | 8 | 8 | 8 | 8 |
61 * | Depth Buffer (16-bit) | 4 | 4 | 4 | 4 | 8 |
62 * | Depth Buffer (other) | 4 | 4 | 4 | 4 | 4 |
63 * | Separate Stencil Buffer | N/A | N/A | 8 | 8 | 8 |
64 * | All Others | 4 | 4 | 4 | 4 | 4 |
65 * +----------------------------------------------------------------------+
66 *
67 * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE
68 * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8.
69 */
70 if (_mesa_is_format_compressed(mt->format)) {
71 /* The hardware alignment requirements for compressed textures
72 * happen to match the block boundaries.
73 */
74 unsigned int i, j;
75 _mesa_get_format_block_size(mt->format, &i, &j);
76
77 /* On Gen9+ we can pick our own alignment for compressed textures but it
78 * has to be a multiple of the block size. The minimum alignment we can
79 * pick is 4 so we effectively have to align to 4 times the block
80 * size
81 */
82 if (brw->gen >= 9)
83 return i * 4;
84 else
85 return i;
86 }
87
88 if (mt->format == MESA_FORMAT_S_UINT8)
89 return 8;
90
91 if (brw->gen >= 7 && mt->format == MESA_FORMAT_Z_UNORM16)
92 return 8;
93
94 if (brw->gen == 8 && mt->mcs_mt && mt->num_samples <= 1)
95 return 16;
96
97 return 4;
98 }
99
100 static unsigned int
101 intel_vertical_texture_alignment_unit(struct brw_context *brw,
102 mesa_format format, bool multisampled)
103 {
104 /**
105 * From the "Alignment Unit Size" section of various specs, namely:
106 * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
107 * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
108 * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
109 * - BSpec (for Ivybridge and slight variations in separate stencil)
110 *
111 * +----------------------------------------------------------------------+
112 * | | alignment unit height ("j") |
113 * | Surface Property |-----------------------------|
114 * | | 915 | 965 | ILK | SNB | IVB |
115 * +----------------------------------------------------------------------+
116 * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 |
117 * | FXT1 compressed format | 4 | 4 | 4 | 4 | 4 |
118 * | Depth Buffer | 2 | 2 | 2 | 4 | 4 |
119 * | Separate Stencil Buffer | N/A | N/A | N/A | 4 | 8 |
120 * | Multisampled (4x or 8x) render target | N/A | N/A | N/A | 4 | 4 |
121 * | All Others | 2 | 2 | 2 | * | * |
122 * +----------------------------------------------------------------------+
123 *
124 * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of
125 * the SURFACE_STATE "Surface Vertical Alignment" field.
126 */
127 if (_mesa_is_format_compressed(format))
128 /* See comment above for the horizontal alignment */
129 return brw->gen >= 9 ? 16 : 4;
130
131 if (format == MESA_FORMAT_S_UINT8)
132 return brw->gen >= 7 ? 8 : 4;
133
134 /* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4
135 * should always be used, except for stencil buffers, which should be 8.
136 */
137 if (brw->gen >= 8)
138 return 4;
139
140 if (multisampled)
141 return 4;
142
143 GLenum base_format = _mesa_get_format_base_format(format);
144
145 if (brw->gen >= 6 &&
146 (base_format == GL_DEPTH_COMPONENT ||
147 base_format == GL_DEPTH_STENCIL)) {
148 return 4;
149 }
150
151 if (brw->gen == 7) {
152 /* On Gen7, we prefer a vertical alignment of 4 when possible, because
153 * that allows Y tiled render targets.
154 *
155 * From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
156 * messages), on p64, under the heading "Surface Vertical Alignment":
157 *
158 * Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL
159 * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY
160 * (0x190)
161 *
162 * VALIGN_4 is not supported for surface format R32G32B32_FLOAT.
163 */
164 if (base_format == GL_YCBCR_MESA || format == MESA_FORMAT_RGB_FLOAT32)
165 return 2;
166
167 return 4;
168 }
169
170 return 2;
171 }
172
173 static void
174 gen9_miptree_layout_1d(struct intel_mipmap_tree *mt)
175 {
176 unsigned x = 0;
177 unsigned width = mt->physical_width0;
178 unsigned depth = mt->physical_depth0; /* number of array layers. */
179
180 /* When this layout is used the horizontal alignment is fixed at 64 and the
181 * hardware ignores the value given in the surface state
182 */
183 const unsigned int align_w = 64;
184
185 mt->total_height = mt->physical_height0;
186 mt->total_width = 0;
187
188 for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
189 unsigned img_width;
190
191 intel_miptree_set_level_info(mt, level, x, 0, depth);
192
193 img_width = ALIGN(width, align_w);
194
195 mt->total_width = MAX2(mt->total_width, x + img_width);
196
197 x += img_width;
198
199 width = minify(width, 1);
200 }
201 }
202
203 static void
204 brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
205 {
206 unsigned x = 0;
207 unsigned y = 0;
208 unsigned width = mt->physical_width0;
209 unsigned height = mt->physical_height0;
210 unsigned depth = mt->physical_depth0; /* number of array layers. */
211 unsigned int bw, bh;
212
213 _mesa_get_format_block_size(mt->format, &bw, &bh);
214
215 mt->total_width = mt->physical_width0;
216
217 if (mt->compressed) {
218 mt->total_width = ALIGN(mt->physical_width0, mt->align_w);
219 }
220
221 /* May need to adjust width to accommodate the placement of
222 * the 2nd mipmap. This occurs when the alignment
223 * constraints of mipmap placement push the right edge of the
224 * 2nd mipmap out past the width of its parent.
225 */
226 if (mt->first_level != mt->last_level) {
227 unsigned mip1_width;
228
229 if (mt->compressed) {
230 mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
231 ALIGN(minify(mt->physical_width0, 2), bw);
232 } else {
233 mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
234 minify(mt->physical_width0, 2);
235 }
236
237 if (mip1_width > mt->total_width) {
238 mt->total_width = mip1_width;
239 }
240 }
241
242 mt->total_height = 0;
243
244 for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
245 unsigned img_height;
246
247 intel_miptree_set_level_info(mt, level, x, y, depth);
248
249 img_height = ALIGN(height, mt->align_h);
250 if (mt->compressed)
251 img_height /= bh;
252
253 if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
254 /* Compact arrays with separated miplevels */
255 img_height *= depth;
256 }
257
258 /* Because the images are packed better, the final offset
259 * might not be the maximal one:
260 */
261 mt->total_height = MAX2(mt->total_height, y + img_height);
262
263 /* Layout_below: step right after second mipmap.
264 */
265 if (level == mt->first_level + 1) {
266 x += ALIGN(width, mt->align_w);
267 } else {
268 y += img_height;
269 }
270
271 width = minify(width, 1);
272 height = minify(height, 1);
273
274 if (mt->target == GL_TEXTURE_3D)
275 depth = minify(depth, 1);
276 }
277 }
278
279 unsigned
280 brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw,
281 const struct intel_mipmap_tree *mt,
282 unsigned level)
283 {
284 assert(brw->gen < 9);
285
286 if (mt->target == GL_TEXTURE_3D ||
287 (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) {
288 return ALIGN(minify(mt->physical_width0, level), mt->align_w);
289 } else {
290 return 0;
291 }
292 }
293
294 unsigned
295 brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw,
296 const struct intel_mipmap_tree *mt,
297 unsigned level)
298 {
299 if (brw->gen >= 9) {
300 /* ALL_SLICES_AT_EACH_LOD isn't supported on Gen8+ but this code will
301 * effectively end up with a packed qpitch anyway whenever
302 * mt->first_level == mt->last_level.
303 */
304 assert(mt->array_layout != ALL_SLICES_AT_EACH_LOD);
305
306 /* On Gen9 we can pick whatever qpitch we like as long as it's aligned
307 * to the vertical alignment so we don't need to add any extra rows.
308 */
309 unsigned qpitch = mt->total_height;
310
311 /* If the surface might be used as a stencil buffer or HiZ buffer then
312 * it needs to be a multiple of 8.
313 */
314 const GLenum base_format = _mesa_get_format_base_format(mt->format);
315 if (_mesa_is_depth_or_stencil_format(base_format))
316 qpitch = ALIGN(qpitch, 8);
317
318 /* 3D textures need to be aligned to the tile height. At this point we
319 * don't know which tiling will be used so let's just align it to 32
320 */
321 if (mt->target == GL_TEXTURE_3D)
322 qpitch = ALIGN(qpitch, 32);
323
324 return qpitch;
325
326 } else if (mt->target == GL_TEXTURE_3D ||
327 (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) ||
328 mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
329 return ALIGN(minify(mt->physical_height0, level), mt->align_h);
330
331 } else {
332 const unsigned h0 = ALIGN(mt->physical_height0, mt->align_h);
333 const unsigned h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
334
335 return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h;
336 }
337 }
338
339 static void
340 align_cube(struct intel_mipmap_tree *mt)
341 {
342 /* The 965's sampler lays cachelines out according to how accesses
343 * in the texture surfaces run, so they may be "vertical" through
344 * memory. As a result, the docs say in Surface Padding Requirements:
345 * Sampling Engine Surfaces that two extra rows of padding are required.
346 */
347 if (mt->target == GL_TEXTURE_CUBE_MAP)
348 mt->total_height += 2;
349 }
350
351 static bool
352 use_linear_1d_layout(struct brw_context *brw,
353 struct intel_mipmap_tree *mt)
354 {
355 /* On Gen9+ the mipmap levels of a 1D surface are all laid out in a
356 * horizontal line. This isn't done for depth/stencil buffers however
357 * because those will be using a tiled layout
358 */
359 if (brw->gen >= 9 &&
360 (mt->target == GL_TEXTURE_1D ||
361 mt->target == GL_TEXTURE_1D_ARRAY)) {
362 GLenum base_format = _mesa_get_format_base_format(mt->format);
363
364 if (base_format != GL_DEPTH_COMPONENT &&
365 base_format != GL_DEPTH_STENCIL &&
366 base_format != GL_STENCIL_INDEX)
367 return true;
368 }
369
370 return false;
371 }
372
373 static void
374 brw_miptree_layout_texture_array(struct brw_context *brw,
375 struct intel_mipmap_tree *mt)
376 {
377 unsigned height = mt->physical_height0;
378 bool layout_1d = use_linear_1d_layout(brw, mt);
379 int physical_qpitch;
380
381 if (layout_1d)
382 gen9_miptree_layout_1d(mt);
383 else
384 brw_miptree_layout_2d(mt);
385
386 if (layout_1d) {
387 physical_qpitch = 1;
388 /* When using the horizontal layout the qpitch specifies the distance in
389 * pixels between array slices. The total_width is forced to be a
390 * multiple of the horizontal alignment in brw_miptree_layout_1d (in
391 * this case it's always 64). The vertical alignment is ignored.
392 */
393 mt->qpitch = mt->total_width;
394 } else {
395 mt->qpitch = brw_miptree_get_vertical_slice_pitch(brw, mt, 0);
396 /* Unlike previous generations the qpitch is a multiple of the
397 * compressed block size on Gen9 so physical_qpitch matches mt->qpitch.
398 */
399 physical_qpitch = (mt->compressed && brw->gen < 9 ? mt->qpitch / 4 :
400 mt->qpitch);
401 }
402
403 for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
404 unsigned img_height;
405 img_height = ALIGN(height, mt->align_h);
406 if (mt->compressed)
407 img_height /= mt->align_h;
408
409 for (int q = 0; q < mt->level[level].depth; q++) {
410 if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
411 intel_miptree_set_image_offset(mt, level, q, 0, q * img_height);
412 } else {
413 intel_miptree_set_image_offset(mt, level, q, 0, q * physical_qpitch);
414 }
415 }
416 height = minify(height, 1);
417 }
418 if (mt->array_layout == ALL_LOD_IN_EACH_SLICE)
419 mt->total_height = physical_qpitch * mt->physical_depth0;
420
421 align_cube(mt);
422 }
423
424 static void
425 brw_miptree_layout_texture_3d(struct brw_context *brw,
426 struct intel_mipmap_tree *mt)
427 {
428 unsigned yscale = mt->compressed ? 4 : 1;
429
430 mt->total_width = 0;
431 mt->total_height = 0;
432
433 unsigned ysum = 0;
434 for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
435 unsigned WL = MAX2(mt->physical_width0 >> level, 1);
436 unsigned HL = MAX2(mt->physical_height0 >> level, 1);
437 unsigned DL = MAX2(mt->physical_depth0 >> level, 1);
438 unsigned wL = ALIGN(WL, mt->align_w);
439 unsigned hL = ALIGN(HL, mt->align_h);
440
441 if (mt->target == GL_TEXTURE_CUBE_MAP)
442 DL = 6;
443
444 intel_miptree_set_level_info(mt, level, 0, 0, DL);
445
446 for (unsigned q = 0; q < DL; q++) {
447 unsigned x = (q % (1 << level)) * wL;
448 unsigned y = ysum + (q >> level) * hL;
449
450 intel_miptree_set_image_offset(mt, level, q, x, y / yscale);
451 mt->total_width = MAX2(mt->total_width, x + wL);
452 mt->total_height = MAX2(mt->total_height, (y + hL) / yscale);
453 }
454
455 ysum += ALIGN(DL, 1 << level) / (1 << level) * hL;
456 }
457
458 align_cube(mt);
459 }
460
461 void
462 brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
463 {
464 bool multisampled = mt->num_samples > 1;
465 bool gen6_hiz_or_stencil = false;
466
467 if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
468 const GLenum base_format = _mesa_get_format_base_format(mt->format);
469 gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
470 }
471
472 if (gen6_hiz_or_stencil) {
473 /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the
474 * hardware doesn't support multiple mip levels on stencil/hiz.
475 *
476 * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
477 * "The hierarchical depth buffer does not support the LOD field"
478 *
479 * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer:
480 * "The stencil depth buffer does not support the LOD field"
481 */
482 if (mt->format == MESA_FORMAT_S_UINT8) {
483 /* Stencil uses W tiling, so we force W tiling alignment for the
484 * ALL_SLICES_AT_EACH_LOD miptree layout.
485 */
486 mt->align_w = 64;
487 mt->align_h = 64;
488 } else {
489 /* Depth uses Y tiling, so we force need Y tiling alignment for the
490 * ALL_SLICES_AT_EACH_LOD miptree layout.
491 */
492 mt->align_w = 128 / mt->cpp;
493 mt->align_h = 32;
494 }
495 } else {
496 mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt);
497 mt->align_h =
498 intel_vertical_texture_alignment_unit(brw, mt->format, multisampled);
499 }
500
501 switch (mt->target) {
502 case GL_TEXTURE_CUBE_MAP:
503 if (brw->gen == 4) {
504 /* Gen4 stores cube maps as 3D textures. */
505 assert(mt->physical_depth0 == 6);
506 brw_miptree_layout_texture_3d(brw, mt);
507 } else {
508 /* All other hardware stores cube maps as 2D arrays. */
509 brw_miptree_layout_texture_array(brw, mt);
510 }
511 break;
512
513 case GL_TEXTURE_3D:
514 if (brw->gen >= 9)
515 brw_miptree_layout_texture_array(brw, mt);
516 else
517 brw_miptree_layout_texture_3d(brw, mt);
518 break;
519
520 case GL_TEXTURE_1D_ARRAY:
521 case GL_TEXTURE_2D_ARRAY:
522 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
523 case GL_TEXTURE_CUBE_MAP_ARRAY:
524 brw_miptree_layout_texture_array(brw, mt);
525 break;
526
527 default:
528 switch (mt->msaa_layout) {
529 case INTEL_MSAA_LAYOUT_UMS:
530 case INTEL_MSAA_LAYOUT_CMS:
531 brw_miptree_layout_texture_array(brw, mt);
532 break;
533 case INTEL_MSAA_LAYOUT_NONE:
534 case INTEL_MSAA_LAYOUT_IMS:
535 if (use_linear_1d_layout(brw, mt))
536 gen9_miptree_layout_1d(mt);
537 else
538 brw_miptree_layout_2d(mt);
539 break;
540 }
541 break;
542 }
543 DBG("%s: %dx%dx%d\n", __func__,
544 mt->total_width, mt->total_height, mt->cpp);
545
546 /* On Gen9+ the alignment values are expressed in multiples of the block
547 * size
548 */
549 if (brw->gen >= 9) {
550 unsigned int i, j;
551 _mesa_get_format_block_size(mt->format, &i, &j);
552 mt->align_w /= i;
553 mt->align_h /= j;
554 }
555 }
556