i965/gen6: Force tile alignment for each stencil/hiz LOD
[mesa.git] / src / mesa / drivers / dri / i965 / brw_tex_layout.c
1 /*
2 * Copyright 2006 VMware, Inc.
3 * Copyright © 2006 Intel Corporation
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sublicense, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial
15 * portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
21 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
22 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file brw_tex_layout.cpp
28 *
29 * Code to lay out images in a mipmap tree.
30 *
31 * \author Keith Whitwell <keithw@vmware.com>
32 * \author Michel Dänzer <daenzer@vmware.com>
33 */
34
35 #include "intel_mipmap_tree.h"
36 #include "brw_context.h"
37 #include "main/macros.h"
38 #include "main/glformats.h"
39
40 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
41
42 static unsigned int
43 intel_horizontal_texture_alignment_unit(struct brw_context *brw,
44 mesa_format format)
45 {
46 /**
47 * From the "Alignment Unit Size" section of various specs, namely:
48 * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
49 * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
50 * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
51 * - BSpec (for Ivybridge and slight variations in separate stencil)
52 *
53 * +----------------------------------------------------------------------+
54 * | | alignment unit width ("i") |
55 * | Surface Property |-----------------------------|
56 * | | 915 | 965 | ILK | SNB | IVB |
57 * +----------------------------------------------------------------------+
58 * | YUV 4:2:2 format | 8 | 4 | 4 | 4 | 4 |
59 * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 |
60 * | FXT1 compressed format | 8 | 8 | 8 | 8 | 8 |
61 * | Depth Buffer (16-bit) | 4 | 4 | 4 | 4 | 8 |
62 * | Depth Buffer (other) | 4 | 4 | 4 | 4 | 4 |
63 * | Separate Stencil Buffer | N/A | N/A | 8 | 8 | 8 |
64 * | All Others | 4 | 4 | 4 | 4 | 4 |
65 * +----------------------------------------------------------------------+
66 *
67 * On IVB+, non-special cases can be overridden by setting the SURFACE_STATE
68 * "Surface Horizontal Alignment" field to HALIGN_4 or HALIGN_8.
69 */
70 if (_mesa_is_format_compressed(format)) {
71 /* The hardware alignment requirements for compressed textures
72 * happen to match the block boundaries.
73 */
74 unsigned int i, j;
75 _mesa_get_format_block_size(format, &i, &j);
76 return i;
77 }
78
79 if (format == MESA_FORMAT_S_UINT8)
80 return 8;
81
82 if (brw->gen >= 7 && format == MESA_FORMAT_Z_UNORM16)
83 return 8;
84
85 return 4;
86 }
87
88 static unsigned int
89 intel_vertical_texture_alignment_unit(struct brw_context *brw,
90 mesa_format format, bool multisampled)
91 {
92 /**
93 * From the "Alignment Unit Size" section of various specs, namely:
94 * - Gen3 Spec: "Memory Data Formats" Volume, Section 1.20.1.4
95 * - i965 and G45 PRMs: Volume 1, Section 6.17.3.4.
96 * - Ironlake and Sandybridge PRMs: Volume 1, Part 1, Section 7.18.3.4
97 * - BSpec (for Ivybridge and slight variations in separate stencil)
98 *
99 * +----------------------------------------------------------------------+
100 * | | alignment unit height ("j") |
101 * | Surface Property |-----------------------------|
102 * | | 915 | 965 | ILK | SNB | IVB |
103 * +----------------------------------------------------------------------+
104 * | BC1-5 compressed format (DXTn/S3TC) | 4 | 4 | 4 | 4 | 4 |
105 * | FXT1 compressed format | 4 | 4 | 4 | 4 | 4 |
106 * | Depth Buffer | 2 | 2 | 2 | 4 | 4 |
107 * | Separate Stencil Buffer | N/A | N/A | N/A | 4 | 8 |
108 * | Multisampled (4x or 8x) render target | N/A | N/A | N/A | 4 | 4 |
109 * | All Others | 2 | 2 | 2 | * | * |
110 * +----------------------------------------------------------------------+
111 *
112 * Where "*" means either VALIGN_2 or VALIGN_4 depending on the setting of
113 * the SURFACE_STATE "Surface Vertical Alignment" field.
114 */
115 if (_mesa_is_format_compressed(format))
116 return 4;
117
118 if (format == MESA_FORMAT_S_UINT8)
119 return brw->gen >= 7 ? 8 : 4;
120
121 /* Broadwell only supports VALIGN of 4, 8, and 16. The BSpec says 4
122 * should always be used, except for stencil buffers, which should be 8.
123 */
124 if (brw->gen >= 8)
125 return 4;
126
127 if (multisampled)
128 return 4;
129
130 GLenum base_format = _mesa_get_format_base_format(format);
131
132 if (brw->gen >= 6 &&
133 (base_format == GL_DEPTH_COMPONENT ||
134 base_format == GL_DEPTH_STENCIL)) {
135 return 4;
136 }
137
138 if (brw->gen == 7) {
139 /* On Gen7, we prefer a vertical alignment of 4 when possible, because
140 * that allows Y tiled render targets.
141 *
142 * From the Ivy Bridge PRM, Vol4 Part1 2.12.2.1 (SURFACE_STATE for most
143 * messages), on p64, under the heading "Surface Vertical Alignment":
144 *
145 * Value of 1 [VALIGN_4] is not supported for format YCRCB_NORMAL
146 * (0x182), YCRCB_SWAPUVY (0x183), YCRCB_SWAPUV (0x18f), YCRCB_SWAPY
147 * (0x190)
148 *
149 * VALIGN_4 is not supported for surface format R32G32B32_FLOAT.
150 */
151 if (base_format == GL_YCBCR_MESA || format == MESA_FORMAT_RGB_FLOAT32)
152 return 2;
153
154 return 4;
155 }
156
157 return 2;
158 }
159
160 static void
161 brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
162 {
163 unsigned x = 0;
164 unsigned y = 0;
165 unsigned width = mt->physical_width0;
166 unsigned height = mt->physical_height0;
167 unsigned depth = mt->physical_depth0; /* number of array layers. */
168
169 mt->total_width = mt->physical_width0;
170
171 if (mt->compressed) {
172 mt->total_width = ALIGN(mt->physical_width0, mt->align_w);
173 }
174
175 /* May need to adjust width to accomodate the placement of
176 * the 2nd mipmap. This occurs when the alignment
177 * constraints of mipmap placement push the right edge of the
178 * 2nd mipmap out past the width of its parent.
179 */
180 if (mt->first_level != mt->last_level) {
181 unsigned mip1_width;
182
183 if (mt->compressed) {
184 mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
185 ALIGN(minify(mt->physical_width0, 2), mt->align_w);
186 } else {
187 mip1_width = ALIGN(minify(mt->physical_width0, 1), mt->align_w) +
188 minify(mt->physical_width0, 2);
189 }
190
191 if (mip1_width > mt->total_width) {
192 mt->total_width = mip1_width;
193 }
194 }
195
196 mt->total_height = 0;
197
198 for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
199 unsigned img_height;
200
201 intel_miptree_set_level_info(mt, level, x, y, depth);
202
203 img_height = ALIGN(height, mt->align_h);
204 if (mt->compressed)
205 img_height /= mt->align_h;
206
207 if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
208 /* Compact arrays with separated miplevels */
209 img_height *= depth;
210 }
211
212 /* Because the images are packed better, the final offset
213 * might not be the maximal one:
214 */
215 mt->total_height = MAX2(mt->total_height, y + img_height);
216
217 /* Layout_below: step right after second mipmap.
218 */
219 if (level == mt->first_level + 1) {
220 x += ALIGN(width, mt->align_w);
221 } else {
222 y += img_height;
223 }
224
225 width = minify(width, 1);
226 height = minify(height, 1);
227 }
228 }
229
230 static void
231 align_cube(struct intel_mipmap_tree *mt)
232 {
233 /* The 965's sampler lays cachelines out according to how accesses
234 * in the texture surfaces run, so they may be "vertical" through
235 * memory. As a result, the docs say in Surface Padding Requirements:
236 * Sampling Engine Surfaces that two extra rows of padding are required.
237 */
238 if (mt->target == GL_TEXTURE_CUBE_MAP)
239 mt->total_height += 2;
240 }
241
242 static void
243 brw_miptree_layout_texture_array(struct brw_context *brw,
244 struct intel_mipmap_tree *mt)
245 {
246 int h0, h1;
247 unsigned height = mt->physical_height0;
248
249 h0 = ALIGN(mt->physical_height0, mt->align_h);
250 h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
251 if (mt->array_layout == ALL_SLICES_AT_EACH_LOD)
252 mt->qpitch = h0;
253 else
254 mt->qpitch = (h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h);
255
256 int physical_qpitch = mt->compressed ? mt->qpitch / 4 : mt->qpitch;
257
258 brw_miptree_layout_2d(mt);
259
260 for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
261 unsigned img_height;
262 img_height = ALIGN(height, mt->align_h);
263 if (mt->compressed)
264 img_height /= mt->align_h;
265
266 for (int q = 0; q < mt->physical_depth0; q++) {
267 if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
268 intel_miptree_set_image_offset(mt, level, q, 0, q * img_height);
269 } else {
270 intel_miptree_set_image_offset(mt, level, q, 0, q * physical_qpitch);
271 }
272 }
273 height = minify(height, 1);
274 }
275 if (mt->array_layout == ALL_LOD_IN_EACH_SLICE)
276 mt->total_height = physical_qpitch * mt->physical_depth0;
277
278 align_cube(mt);
279 }
280
281 static void
282 brw_miptree_layout_texture_3d(struct brw_context *brw,
283 struct intel_mipmap_tree *mt)
284 {
285 unsigned yscale = mt->compressed ? 4 : 1;
286
287 mt->total_width = 0;
288 mt->total_height = 0;
289
290 unsigned ysum = 0;
291 for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
292 unsigned WL = MAX2(mt->physical_width0 >> level, 1);
293 unsigned HL = MAX2(mt->physical_height0 >> level, 1);
294 unsigned DL = MAX2(mt->physical_depth0 >> level, 1);
295 unsigned wL = ALIGN(WL, mt->align_w);
296 unsigned hL = ALIGN(HL, mt->align_h);
297
298 if (mt->target == GL_TEXTURE_CUBE_MAP)
299 DL = 6;
300
301 intel_miptree_set_level_info(mt, level, 0, 0, DL);
302
303 for (unsigned q = 0; q < DL; q++) {
304 unsigned x = (q % (1 << level)) * wL;
305 unsigned y = ysum + (q >> level) * hL;
306
307 intel_miptree_set_image_offset(mt, level, q, x, y / yscale);
308 mt->total_width = MAX2(mt->total_width, x + wL);
309 mt->total_height = MAX2(mt->total_height, (y + hL) / yscale);
310 }
311
312 ysum += ALIGN(DL, 1 << level) / (1 << level) * hL;
313 }
314
315 align_cube(mt);
316 }
317
318 void
319 brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
320 {
321 bool multisampled = mt->num_samples > 1;
322 bool gen6_hiz_or_stencil = false;
323
324 if (brw->gen == 6 && mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
325 const GLenum base_format = _mesa_get_format_base_format(mt->format);
326 gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
327 }
328
329 if (gen6_hiz_or_stencil) {
330 /* On gen6, we use ALL_SLICES_AT_EACH_LOD for stencil/hiz because the
331 * hardware doesn't support multiple mip levels on stencil/hiz.
332 *
333 * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
334 * "The hierarchical depth buffer does not support the LOD field"
335 *
336 * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer:
337 * "The stencil depth buffer does not support the LOD field"
338 */
339 if (mt->format == MESA_FORMAT_S_UINT8) {
340 /* Stencil uses W tiling, so we force W tiling alignment for the
341 * ALL_SLICES_AT_EACH_LOD miptree layout.
342 */
343 mt->align_w = 64;
344 mt->align_h = 64;
345 } else {
346 /* Depth uses Y tiling, so we force need Y tiling alignment for the
347 * ALL_SLICES_AT_EACH_LOD miptree layout.
348 */
349 mt->align_w = 128 / mt->cpp;
350 mt->align_h = 32;
351 }
352 } else {
353 mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt->format);
354 mt->align_h =
355 intel_vertical_texture_alignment_unit(brw, mt->format, multisampled);
356 }
357
358 switch (mt->target) {
359 case GL_TEXTURE_CUBE_MAP:
360 if (brw->gen == 4) {
361 /* Gen4 stores cube maps as 3D textures. */
362 assert(mt->physical_depth0 == 6);
363 brw_miptree_layout_texture_3d(brw, mt);
364 } else {
365 /* All other hardware stores cube maps as 2D arrays. */
366 brw_miptree_layout_texture_array(brw, mt);
367 }
368 break;
369
370 case GL_TEXTURE_3D:
371 brw_miptree_layout_texture_3d(brw, mt);
372 break;
373
374 case GL_TEXTURE_1D_ARRAY:
375 case GL_TEXTURE_2D_ARRAY:
376 case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
377 case GL_TEXTURE_CUBE_MAP_ARRAY:
378 brw_miptree_layout_texture_array(brw, mt);
379 break;
380
381 default:
382 switch (mt->msaa_layout) {
383 case INTEL_MSAA_LAYOUT_UMS:
384 case INTEL_MSAA_LAYOUT_CMS:
385 brw_miptree_layout_texture_array(brw, mt);
386 break;
387 case INTEL_MSAA_LAYOUT_NONE:
388 case INTEL_MSAA_LAYOUT_IMS:
389 brw_miptree_layout_2d(mt);
390 break;
391 }
392 break;
393 }
394 DBG("%s: %dx%dx%d\n", __FUNCTION__,
395 mt->total_width, mt->total_height, mt->cpp);
396 }
397