033b5c33f996f70322523e985e0f65850c50a5c1
[mesa.git] / src / freedreno / fdl / fd6_layout.c
1 /*
2 * Copyright (C) 2018 Rob Clark <robclark@freedesktop.org>
3 * Copyright © 2018-2019 Google, Inc.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 * Authors:
25 * Rob Clark <robclark@freedesktop.org>
26 */
27
28 #include <stdio.h>
29
30 #include "freedreno_layout.h"
31
32 /* indexed by cpp, including msaa 2x and 4x:
33 * TODO:
34 * cpp=1 UBWC needs testing at larger texture sizes
35 * missing UBWC blockwidth/blockheight for npot+64 cpp
36 * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
37 */
38 static const struct {
39 unsigned basealign;
40 unsigned pitchalign;
41 unsigned heightalign;
42 uint8_t ubwc_blockwidth;
43 uint8_t ubwc_blockheight;
44 } tile_alignment[] = {
45 [1] = { 64, 128, 32, 16, 4 },
46 [2] = { 128, 128, 16, 16, 4 },
47 [3] = { 256, 64, 32 },
48 [4] = { 256, 64, 16, 16, 4 },
49 [6] = { 256, 64, 16 },
50 [8] = { 256, 64, 16, 8, 4, },
51 [12] = { 256, 64, 16 },
52 [16] = { 256, 64, 16, 4, 4, },
53 [24] = { 256, 64, 16 },
54 [32] = { 256, 64, 16, 4, 2 },
55 [48] = { 256, 64, 16 },
56 [64] = { 256, 64, 16 },
57
58 /* special cases for r8g8: */
59 [0] = { 256, 64, 32, 16, 4 },
60 };
61
62 #define RGB_TILE_WIDTH_ALIGNMENT 64
63 #define RGB_TILE_HEIGHT_ALIGNMENT 16
64 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
65
66 static int
67 fdl6_pitchalign(struct fdl_layout *layout, int ta, int level)
68 {
69 const struct util_format_description *format_desc =
70 util_format_description(layout->format);
71
72 uint32_t pitchalign = 64;
73 if (fdl_tile_mode(layout, level))
74 pitchalign = tile_alignment[ta].pitchalign;
75 if (format_desc->layout == UTIL_FORMAT_LAYOUT_ASTC)
76 pitchalign *= util_format_get_blockwidth(layout->format);
77 return pitchalign;
78 }
79
80 /* NOTE: good way to test this is: (for example)
81 * piglit/bin/texelFetch fs sampler3D 100x100x8
82 */
83 void
84 fdl6_layout(struct fdl_layout *layout,
85 enum pipe_format format, uint32_t nr_samples,
86 uint32_t width0, uint32_t height0, uint32_t depth0,
87 uint32_t mip_levels, uint32_t array_size, bool is_3d)
88 {
89 assert(nr_samples > 0);
90 layout->width0 = width0;
91 layout->height0 = height0;
92 layout->depth0 = depth0;
93
94 layout->cpp = util_format_get_blocksize(format);
95 layout->cpp *= nr_samples;
96 layout->cpp_shift = ffs(layout->cpp) - 1;
97
98 layout->format = format;
99 layout->nr_samples = nr_samples;
100 layout->layer_first = !is_3d;
101
102 if (depth0 > 1)
103 layout->ubwc = false;
104 if (tile_alignment[layout->cpp].ubwc_blockwidth == 0)
105 layout->ubwc = false;
106
107 int ta = layout->cpp;
108
109 /* The z16/r16 formats seem to not play by the normal tiling rules: */
110 if ((layout->cpp == 2) && (util_format_get_nr_components(format) == 2))
111 ta = 0;
112
113 /* in layer_first layout, the level (slice) contains just one
114 * layer (since in fact the layer contains the slices)
115 */
116 uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
117
118 debug_assert(ta < ARRAY_SIZE(tile_alignment));
119 debug_assert(tile_alignment[ta].pitchalign);
120
121 if (layout->tile_mode) {
122 layout->base_align = tile_alignment[ta].basealign;
123 } else {
124 layout->base_align = 64;
125 }
126
127 uint32_t pitch0 = util_align_npot(width0, fdl6_pitchalign(layout, ta, 0));
128
129 for (uint32_t level = 0; level < mip_levels; level++) {
130 uint32_t depth = u_minify(depth0, level);
131 struct fdl_slice *slice = &layout->slices[level];
132 struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
133 uint32_t tile_mode = fdl_tile_mode(layout, level);
134 uint32_t width, height;
135
136 /* tiled levels of 3D textures are rounded up to PoT dimensions: */
137 if (is_3d && tile_mode) {
138 width = u_minify(util_next_power_of_two(width0), level);
139 height = u_minify(util_next_power_of_two(height0), level);
140 } else {
141 width = u_minify(width0, level);
142 height = u_minify(height0, level);
143 }
144
145 if (tile_mode)
146 height = align(height, tile_alignment[ta].heightalign);
147
148 /* The blits used for mem<->gmem work at a granularity of
149 * 32x32, which can cause faults due to over-fetch on the
150 * last level. The simple solution is to over-allocate a
151 * bit the last level to ensure any over-fetch is harmless.
152 * The pitch is already sufficiently aligned, but height
153 * may not be:
154 */
155 if (level == mip_levels - 1)
156 height = align(height, 32);
157
158 uint32_t pitch_pixels = util_align_npot(u_minify(pitch0, level),
159 fdl6_pitchalign(layout, ta, level));
160
161 slice->offset = layout->size;
162 uint32_t blocks = util_format_get_nblocks(format,
163 pitch_pixels, height);
164
165 slice->pitch = util_format_get_nblocksx(format, pitch_pixels) *
166 layout->cpp;
167
168 /* 1d array and 2d array textures must all have the same layer size
169 * for each miplevel on a6xx. 3d textures can have different layer
170 * sizes for high levels, but the hw auto-sizer is buggy (or at least
171 * different than what this code does), so as soon as the layer size
172 * range gets into range, we stop reducing it.
173 */
174 if (is_3d) {
175 if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
176 slice->size0 = align(blocks * layout->cpp, 4096);
177 } else {
178 slice->size0 = layout->slices[level - 1].size0;
179 }
180 } else {
181 slice->size0 = blocks * layout->cpp;
182 }
183
184 layout->size += slice->size0 * depth * layers_in_level;
185
186 if (layout->ubwc) {
187 /* with UBWC every level is aligned to 4K */
188 layout->size = align(layout->size, 4096);
189
190 uint32_t block_width = tile_alignment[ta].ubwc_blockwidth;
191 uint32_t block_height = tile_alignment[ta].ubwc_blockheight;
192 uint32_t meta_pitch = align(DIV_ROUND_UP(width, block_width),
193 RGB_TILE_WIDTH_ALIGNMENT);
194 uint32_t meta_height = align(DIV_ROUND_UP(height, block_height),
195 RGB_TILE_HEIGHT_ALIGNMENT);
196
197 /* it looks like mipmaps need alignment to power of two
198 * TODO: needs testing with large npot textures
199 * (needed for the first level?)
200 */
201 if (mip_levels > 1) {
202 meta_pitch = util_next_power_of_two(meta_pitch);
203 meta_height = util_next_power_of_two(meta_height);
204 }
205
206 ubwc_slice->size0 = align(meta_pitch * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
207 ubwc_slice->pitch = meta_pitch;
208 ubwc_slice->offset = layout->ubwc_layer_size;
209 layout->ubwc_layer_size += ubwc_slice->size0;
210 }
211 }
212
213 if (layout->layer_first) {
214 layout->layer_size = align(layout->size, 4096);
215 layout->size = layout->layer_size * array_size;
216 }
217
218 /* Place the UBWC slices before the uncompressed slices, because the
219 * kernel expects UBWC to be at the start of the buffer. In the HW, we
220 * get to program the UBWC and non-UBWC offset/strides
221 * independently.
222 */
223 if (layout->ubwc) {
224 for (uint32_t level = 0; level < mip_levels; level++)
225 layout->slices[level].offset += layout->ubwc_layer_size * array_size;
226 layout->size += layout->ubwc_layer_size * array_size;
227 }
228 }
229
230 void
231 fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
232 uint32_t *blockwidth, uint32_t *blockheight)
233 {
234 *blockwidth = tile_alignment[layout->cpp].ubwc_blockwidth;
235 *blockheight = tile_alignment[layout->cpp].ubwc_blockheight;
236 }