i965: Make the CopyImage BLT path bail for stencil images.
[mesa.git] / src / mesa / drivers / dri / i965 / intel_copy_image.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 Intel Corporation All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jason Ekstrand <jason.ekstrand@intel.com>
26 */
27
28 #include "intel_fbo.h"
29 #include "intel_tex.h"
30 #include "intel_blit.h"
31 #include "intel_mipmap_tree.h"
32 #include "main/formats.h"
33 #include "main/teximage.h"
34 #include "drivers/common/meta.h"
35
36 static bool
37 copy_image_with_blitter(struct brw_context *brw,
38 struct intel_mipmap_tree *src_mt, int src_level,
39 int src_x, int src_y, int src_z,
40 struct intel_mipmap_tree *dst_mt, int dst_level,
41 int dst_x, int dst_y, int dst_z,
42 int src_width, int src_height)
43 {
44 GLuint bw, bh;
45 uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
46
47 /* The blitter doesn't understand multisampling at all. */
48 if (src_mt->num_samples > 0 || dst_mt->num_samples > 0)
49 return false;
50
51 if (src_mt->format == MESA_FORMAT_S_UINT8)
52 return false;
53
54 /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
55 * Data Size Limitations):
56 *
57 * The BLT engine is capable of transferring very large quantities of
58 * graphics data. Any graphics data read from and written to the
59 * destination is permitted to represent a number of pixels that
60 * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
61 * at the destination. The maximum number of pixels that may be
62 * represented per scan line’s worth of graphics data depends on the
63 * color depth.
64 *
65 * Furthermore, intelEmitCopyBlit (which is called below) uses a signed
66 * 16-bit integer to represent buffer pitch, so it can only handle buffer
67 * pitches < 32k.
68 *
69 * As a result of these two limitations, we can only use the blitter to do
70 * this copy when the miptree's pitch is less than 32k.
71 */
72 if (src_mt->pitch >= 32768 ||
73 dst_mt->pitch >= 32768) {
74 perf_debug("Falling back due to >=32k pitch\n");
75 return false;
76 }
77
78 intel_miptree_get_image_offset(src_mt, src_level, src_z,
79 &src_image_x, &src_image_y);
80
81 if (_mesa_is_format_compressed(src_mt->format)) {
82 _mesa_get_format_block_size(src_mt->format, &bw, &bh);
83
84 assert(src_x % bw == 0);
85 assert(src_y % bh == 0);
86 assert(src_width % bw == 0);
87 assert(src_height % bh == 0);
88
89 src_x /= (int)bw;
90 src_y /= (int)bh;
91 src_width /= (int)bw;
92 src_height /= (int)bh;
93 }
94 src_x += src_image_x;
95 src_y += src_image_y;
96
97 intel_miptree_get_image_offset(dst_mt, dst_level, dst_z,
98 &dst_image_x, &dst_image_y);
99
100 if (_mesa_is_format_compressed(dst_mt->format)) {
101 _mesa_get_format_block_size(dst_mt->format, &bw, &bh);
102
103 assert(dst_x % bw == 0);
104 assert(dst_y % bh == 0);
105
106 dst_x /= (int)bw;
107 dst_y /= (int)bh;
108 }
109 dst_x += dst_image_x;
110 dst_y += dst_image_y;
111
112 return intelEmitCopyBlit(brw,
113 src_mt->cpp,
114 src_mt->pitch,
115 src_mt->bo, src_mt->offset,
116 src_mt->tiling,
117 src_mt->tr_mode,
118 dst_mt->pitch,
119 dst_mt->bo, dst_mt->offset,
120 dst_mt->tiling,
121 dst_mt->tr_mode,
122 src_x, src_y,
123 dst_x, dst_y,
124 src_width, src_height,
125 GL_COPY);
126 }
127
128 static void
129 copy_image_with_memcpy(struct brw_context *brw,
130 struct intel_mipmap_tree *src_mt, int src_level,
131 int src_x, int src_y, int src_z,
132 struct intel_mipmap_tree *dst_mt, int dst_level,
133 int dst_x, int dst_y, int dst_z,
134 int src_width, int src_height)
135 {
136 bool same_slice;
137 void *mapped, *src_mapped, *dst_mapped;
138 ptrdiff_t src_stride, dst_stride, cpp;
139 int map_x1, map_y1, map_x2, map_y2;
140 GLuint src_bw, src_bh;
141
142 cpp = _mesa_get_format_bytes(src_mt->format);
143 _mesa_get_format_block_size(src_mt->format, &src_bw, &src_bh);
144
145 assert(src_width % src_bw == 0);
146 assert(src_height % src_bh == 0);
147 assert(src_x % src_bw == 0);
148 assert(src_y % src_bh == 0);
149
150 /* If we are on the same miptree, same level, and same slice, then
151 * intel_miptree_map won't let us map it twice. We have to do things a
152 * bit differently. In particular, we do a single map large enough for
153 * both portions and in read-write mode.
154 */
155 same_slice = src_mt == dst_mt && src_level == dst_level && src_z == dst_z;
156
157 if (same_slice) {
158 assert(dst_x % src_bw == 0);
159 assert(dst_y % src_bh == 0);
160
161 map_x1 = MIN2(src_x, dst_x);
162 map_y1 = MIN2(src_y, dst_y);
163 map_x2 = MAX2(src_x, dst_x) + src_width;
164 map_y2 = MAX2(src_y, dst_y) + src_height;
165
166 intel_miptree_map(brw, src_mt, src_level, src_z,
167 map_x1, map_y1, map_x2 - map_x1, map_y2 - map_y1,
168 GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
169 &mapped, &src_stride);
170
171 dst_stride = src_stride;
172
173 /* Set the offsets here so we don't have to think about while looping */
174 src_mapped = mapped + ((src_y - map_y1) / src_bh) * src_stride +
175 ((src_x - map_x1) / src_bw) * cpp;
176 dst_mapped = mapped + ((dst_y - map_y1) / src_bh) * dst_stride +
177 ((dst_x - map_x1) / src_bw) * cpp;
178 } else {
179 intel_miptree_map(brw, src_mt, src_level, src_z,
180 src_x, src_y, src_width, src_height,
181 GL_MAP_READ_BIT, &src_mapped, &src_stride);
182 intel_miptree_map(brw, dst_mt, dst_level, dst_z,
183 dst_x, dst_y, src_width, src_height,
184 GL_MAP_WRITE_BIT, &dst_mapped, &dst_stride);
185 }
186
187 src_width /= (int)src_bw;
188 src_height /= (int)src_bh;
189
190 for (int i = 0; i < src_height; ++i) {
191 memcpy(dst_mapped, src_mapped, src_width * cpp);
192 src_mapped += src_stride;
193 dst_mapped += dst_stride;
194 }
195
196 if (same_slice) {
197 intel_miptree_unmap(brw, src_mt, src_level, src_z);
198 } else {
199 intel_miptree_unmap(brw, dst_mt, dst_level, dst_z);
200 intel_miptree_unmap(brw, src_mt, src_level, src_z);
201 }
202 }
203
204 static void
205 copy_miptrees(struct brw_context *brw,
206 struct intel_mipmap_tree *src_mt,
207 int src_x, int src_y, int src_z, unsigned src_level,
208 struct intel_mipmap_tree *dst_mt,
209 int dst_x, int dst_y, int dst_z, unsigned dst_level,
210 int src_width, int src_height)
211 {
212 unsigned bw, bh;
213
214 /* We are now going to try and copy the texture using the blitter. If
215 * that fails, we will fall back mapping the texture and using memcpy.
216 * In either case, we need to do a full resolve.
217 */
218 intel_miptree_all_slices_resolve_hiz(brw, src_mt);
219 intel_miptree_all_slices_resolve_depth(brw, src_mt);
220 intel_miptree_resolve_color(brw, src_mt, 0);
221
222 intel_miptree_all_slices_resolve_hiz(brw, dst_mt);
223 intel_miptree_all_slices_resolve_depth(brw, dst_mt);
224 intel_miptree_resolve_color(brw, dst_mt, 0);
225
226 _mesa_get_format_block_size(src_mt->format, &bw, &bh);
227
228 /* It's legal to have a WxH that's smaller than a compressed block. This
229 * happens for example when you are using a higher level LOD. For this case,
230 * we still want to copy the entire block, or else the decompression will be
231 * incorrect.
232 */
233 if (src_width < bw)
234 src_width = ALIGN_NPOT(src_width, bw);
235
236 if (src_height < bh)
237 src_height = ALIGN_NPOT(src_height, bh);
238
239 if (copy_image_with_blitter(brw, src_mt, src_level,
240 src_x, src_y, src_z,
241 dst_mt, dst_level,
242 dst_x, dst_y, dst_z,
243 src_width, src_height))
244 return;
245
246 /* This is a worst-case scenario software fallback that maps the two
247 * textures and does a memcpy between them.
248 */
249 copy_image_with_memcpy(brw, src_mt, src_level,
250 src_x, src_y, src_z,
251 dst_mt, dst_level,
252 dst_x, dst_y, dst_z,
253 src_width, src_height);
254 }
255
256 static void
257 intel_copy_image_sub_data(struct gl_context *ctx,
258 struct gl_texture_image *src_image,
259 struct gl_renderbuffer *src_renderbuffer,
260 int src_x, int src_y, int src_z,
261 struct gl_texture_image *dst_image,
262 struct gl_renderbuffer *dst_renderbuffer,
263 int dst_x, int dst_y, int dst_z,
264 int src_width, int src_height)
265 {
266 struct brw_context *brw = brw_context(ctx);
267 struct intel_mipmap_tree *src_mt, *dst_mt;
268 unsigned src_level, dst_level;
269
270 if (_mesa_meta_CopyImageSubData_uncompressed(ctx,
271 src_image, src_renderbuffer,
272 src_x, src_y, src_z,
273 dst_image, dst_renderbuffer,
274 dst_x, dst_y, dst_z,
275 src_width, src_height)) {
276 return;
277 }
278
279 if (src_image) {
280 src_mt = intel_texture_image(src_image)->mt;
281 src_level = src_image->Level + src_image->TexObject->MinLevel;
282
283 /* Cube maps actually have different images per face */
284 if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
285 src_z = src_image->Face;
286
287 src_z += src_image->TexObject->MinLayer;
288 } else {
289 assert(src_renderbuffer);
290 src_mt = intel_renderbuffer(src_renderbuffer)->mt;
291 src_image = src_renderbuffer->TexImage;
292 src_level = 0;
293 }
294
295 if (dst_image) {
296 dst_mt = intel_texture_image(dst_image)->mt;
297
298 dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
299
300 /* Cube maps actually have different images per face */
301 if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
302 dst_z = dst_image->Face;
303
304 dst_z += dst_image->TexObject->MinLayer;
305 } else {
306 assert(dst_renderbuffer);
307 dst_mt = intel_renderbuffer(dst_renderbuffer)->mt;
308 dst_image = dst_renderbuffer->TexImage;
309 dst_level = 0;
310 }
311
312 if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) {
313 _mesa_problem(ctx, "Failed to copy multisampled texture with BLORP\n");
314 return;
315 }
316
317 copy_miptrees(brw, src_mt, src_x, src_y, src_z, src_level,
318 dst_mt, dst_x, dst_y, dst_z, dst_level,
319 src_width, src_height);
320
321 /* CopyImage only works for equal formats, texture view equivalence
322 * classes, and a couple special cases for compressed textures.
323 *
324 * Notably, GL_DEPTH_STENCIL does not appear in any equivalence
325 * classes, so we know the formats must be the same, and thus both
326 * will either have stencil, or not. They can't be mismatched.
327 */
328 assert((src_mt->stencil_mt != NULL) == (dst_mt->stencil_mt != NULL));
329
330 if (dst_mt->stencil_mt) {
331 copy_miptrees(brw, src_mt->stencil_mt, src_x, src_y, src_z, src_level,
332 dst_mt->stencil_mt, dst_x, dst_y, dst_z, dst_level,
333 src_width, src_height);
334 }
335 }
336
337 void
338 intelInitCopyImageFuncs(struct dd_function_table *functions)
339 {
340 functions->CopyImageSubData = intel_copy_image_sub_data;
341 }