tu: Move UBWC layout into fdl6_layout() and use that function.
[mesa.git] / src / freedreno / fdl / fd6_layout.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018-2019 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include <stdio.h>
29
30 #include "freedreno_layout.h"
31
32 /* indexed by cpp, including msaa 2x and 4x:
33 * TODO:
34 * cpp=1 UBWC needs testing at larger texture sizes
35 * missing UBWC blockwidth/blockheight for npot+64 cpp
36 * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
37 */
38 static const struct {
39 unsigned pitchalign;
40 unsigned heightalign;
41 uint8_t ubwc_blockwidth;
42 uint8_t ubwc_blockheight;
43 } tile_alignment[] = {
44 [1] = { 128, 32, 16, 4 },
45 [2] = { 128, 16, 16, 4 },
46 [3] = { 64, 32 },
47 [4] = { 64, 16, 16, 4 },
48 [6] = { 64, 16 },
49 [8] = { 64, 16, 8, 4, },
50 [12] = { 64, 16 },
51 [16] = { 64, 16, 4, 4, },
52 [24] = { 64, 16 },
53 [32] = { 64, 16, 4, 2 },
54 [48] = { 64, 16 },
55 [64] = { 64, 16 },
56
57 /* special cases for r8g8: */
58 [0] = { 64, 32, 16, 4 },
59 };
60
61 #define RGB_TILE_WIDTH_ALIGNMENT 64
62 #define RGB_TILE_HEIGHT_ALIGNMENT 16
63 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
64
65 /* NOTE: good way to test this is: (for example)
66 * piglit/bin/texelFetch fs sampler3D 100x100x8
67 */
68 void
69 fdl6_layout(struct fdl_layout *layout,
70 enum pipe_format format, uint32_t nr_samples,
71 uint32_t width0, uint32_t height0, uint32_t depth0,
72 uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc)
73 {
74 assert(nr_samples > 0);
75 layout->width0 = width0;
76 layout->height0 = height0;
77 layout->depth0 = depth0;
78
79 layout->cpp = util_format_get_blocksize(format);
80 layout->cpp *= nr_samples;
81
82 const struct util_format_description *format_desc =
83 util_format_description(format);
84 uint32_t depth = depth0;
85 /* linear dimensions: */
86 uint32_t lwidth = width0;
87 uint32_t lheight = height0;
88 /* tile_mode dimensions: */
89 uint32_t twidth = util_next_power_of_two(lwidth);
90 uint32_t theight = util_next_power_of_two(lheight);
91 int ta = layout->cpp;
92
93 /* The z16/r16 formats seem to not play by the normal tiling rules: */
94 if ((layout->cpp == 2) && (util_format_get_nr_components(format) == 2))
95 ta = 0;
96
97 uint32_t alignment;
98 if (is_3d) {
99 layout->layer_first = false;
100 alignment = 4096;
101 } else {
102 layout->layer_first = true;
103 alignment = 1;
104 }
105 /* in layer_first layout, the level (slice) contains just one
106 * layer (since in fact the layer contains the slices)
107 */
108 uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
109
110 debug_assert(ta < ARRAY_SIZE(tile_alignment));
111 debug_assert(tile_alignment[ta].pitchalign);
112
113 for (uint32_t level = 0; level < mip_levels; level++) {
114 struct fdl_slice *slice = &layout->slices[level];
115 struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
116 uint32_t tile_mode = (ubwc ?
117 layout->tile_mode : fdl_tile_mode(layout, level));
118 uint32_t width, height;
119
120 /* tiled levels of 3D textures are rounded up to PoT dimensions: */
121 if (is_3d && tile_mode) {
122 width = twidth;
123 height = theight;
124 } else {
125 width = lwidth;
126 height = lheight;
127 }
128 uint32_t aligned_height = height;
129 uint32_t pitchalign;
130
131 if (tile_mode) {
132 pitchalign = tile_alignment[ta].pitchalign;
133 aligned_height = align(aligned_height,
134 tile_alignment[ta].heightalign);
135 } else {
136 pitchalign = 64;
137 }
138
139 /* The blits used for mem<->gmem work at a granularity of
140 * 32x32, which can cause faults due to over-fetch on the
141 * last level. The simple solution is to over-allocate a
142 * bit the last level to ensure any over-fetch is harmless.
143 * The pitch is already sufficiently aligned, but height
144 * may not be:
145 */
146 if (level == mip_levels - 1)
147 aligned_height = align(aligned_height, 32);
148
149 if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC)
150 slice->pitch =
151 util_align_npot(width, pitchalign * util_format_get_blockwidth(format));
152 else
153 slice->pitch = align(width, pitchalign);
154
155 slice->offset = layout->size;
156 uint32_t blocks = util_format_get_nblocks(format,
157 slice->pitch, aligned_height);
158
159 /* 1d array and 2d array textures must all have the same layer size
160 * for each miplevel on a6xx. 3d textures can have different layer
161 * sizes for high levels, but the hw auto-sizer is buggy (or at least
162 * different than what this code does), so as soon as the layer size
163 * range gets into range, we stop reducing it.
164 */
165 if (is_3d) {
166 if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
167 slice->size0 = align(blocks * layout->cpp, alignment);
168 } else {
169 slice->size0 = layout->slices[level - 1].size0;
170 }
171 } else {
172 slice->size0 = align(blocks * layout->cpp, alignment);
173 }
174
175 layout->size += slice->size0 * depth * layers_in_level;
176
177 if (ubwc) {
178 /* with UBWC every level is aligned to 4K */
179 layout->size = align(layout->size, 4096);
180
181 uint32_t block_width = tile_alignment[ta].ubwc_blockwidth;
182 uint32_t block_height = tile_alignment[ta].ubwc_blockheight;
183 uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width),
184 RGB_TILE_WIDTH_ALIGNMENT);
185 uint32_t meta_height = align(DIV_ROUND_UP(height, block_height),
186 RGB_TILE_HEIGHT_ALIGNMENT);
187
188 /* it looks like mipmaps need alignment to power of two
189 * TODO: needs testing with large npot textures
190 * (needed for the first level?)
191 */
192 if (mip_levels > 1) {
193 meta_pitch = util_next_power_of_two(meta_pitch);
194 meta_height = util_next_power_of_two(meta_height);
195 }
196
197 ubwc_slice->size0 = align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
198 ubwc_slice->pitch = meta_pitch;
199 ubwc_slice->offset = layout->ubwc_size;
200 layout->ubwc_size += ubwc_slice->size0;
201 }
202
203 depth = u_minify(depth, 1);
204 lwidth = u_minify(lwidth, 1);
205 lheight = u_minify(lheight, 1);
206 twidth = u_minify(twidth, 1);
207 theight = u_minify(theight, 1);
208 }
209
210 if (layout->layer_first) {
211 layout->layer_size = align(layout->size, 4096);
212 layout->size = layout->layer_size * array_size;
213 }
214
215 /* Place the UBWC slices before the uncompressed slices, because the
216 * kernel expects UBWC to be at the start of the buffer. In the HW, we
217 * get to program the UBWC and non-UBWC offset/strides
218 * independently.
219 */
220 if (ubwc) {
221 for (uint32_t level = 0; level < mip_levels; level++)
222 layout->slices[level].offset += layout->ubwc_size * array_size;
223 layout->size += layout->ubwc_size * array_size;
224 }
225
226 if (false) {
227 for (uint32_t level = 0; level < mip_levels; level++) {
228 struct fdl_slice *slice = &layout->slices[level];
229 struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
230 uint32_t tile_mode = (ubwc ?
231 layout->tile_mode : fdl_tile_mode(layout, level));
232
233 fprintf(stderr, "%s: %ux%ux%u@%ux%u:\t%2u: stride=%4u, size=%6u,%6u, aligned_height=%3u, offset=0x%x,0x%x tiling=%d\n",
234 util_format_name(format),
235 u_minify(layout->width0, level),
236 u_minify(layout->height0, level),
237 u_minify(layout->depth0, level),
238 layout->cpp, nr_samples,
239 level,
240 slice->pitch * layout->cpp,
241 slice->size0, ubwc_slice->size0,
242 slice->size0 / (slice->pitch * layout->cpp),
243 slice->offset, ubwc_slice->offset,
244 tile_mode);
245 }
246 }
247 }
248
249 void
250 fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
251 uint32_t *blockwidth, uint32_t *blockheight)
252 {
253 *blockwidth = tile_alignment[layout->cpp].ubwc_blockwidth;
254 *blockheight = tile_alignment[layout->cpp].ubwc_blockheight;
255 }