i965: Add new interface for full color resolves
[mesa.git] / src / mesa / drivers / dri / i965 / intel_copy_image.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 Intel Corporation All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jason Ekstrand <jason.ekstrand@intel.com>
26 */
27
28 #include "brw_blorp.h"
29 #include "intel_fbo.h"
30 #include "intel_tex.h"
31 #include "intel_blit.h"
32 #include "intel_mipmap_tree.h"
33 #include "main/formats.h"
34 #include "main/teximage.h"
35 #include "drivers/common/meta.h"
36
37 static bool
38 copy_image_with_blitter(struct brw_context *brw,
39 struct intel_mipmap_tree *src_mt, int src_level,
40 int src_x, int src_y, int src_z,
41 struct intel_mipmap_tree *dst_mt, int dst_level,
42 int dst_x, int dst_y, int dst_z,
43 int src_width, int src_height)
44 {
45 GLuint bw, bh;
46 uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
47
48 /* The blitter doesn't understand multisampling at all. */
49 if (src_mt->num_samples > 0 || dst_mt->num_samples > 0)
50 return false;
51
52 if (src_mt->format == MESA_FORMAT_S_UINT8)
53 return false;
54
55 /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
56 * Data Size Limitations):
57 *
58 * The BLT engine is capable of transferring very large quantities of
59 * graphics data. Any graphics data read from and written to the
60 * destination is permitted to represent a number of pixels that
61 * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
62 * at the destination. The maximum number of pixels that may be
63 * represented per scan line’s worth of graphics data depends on the
64 * color depth.
65 *
66 * Furthermore, intelEmitCopyBlit (which is called below) uses a signed
67 * 16-bit integer to represent buffer pitch, so it can only handle buffer
68 * pitches < 32k.
69 *
70 * As a result of these two limitations, we can only use the blitter to do
71 * this copy when the miptree's pitch is less than 32k.
72 */
73 if (src_mt->pitch >= 32768 ||
74 dst_mt->pitch >= 32768) {
75 perf_debug("Falling back due to >=32k pitch\n");
76 return false;
77 }
78
79 intel_miptree_get_image_offset(src_mt, src_level, src_z,
80 &src_image_x, &src_image_y);
81
82 if (_mesa_is_format_compressed(src_mt->format)) {
83 _mesa_get_format_block_size(src_mt->format, &bw, &bh);
84
85 assert(src_x % bw == 0);
86 assert(src_y % bh == 0);
87 assert(src_width % bw == 0);
88 assert(src_height % bh == 0);
89
90 src_x /= (int)bw;
91 src_y /= (int)bh;
92 src_width /= (int)bw;
93 src_height /= (int)bh;
94 }
95 src_x += src_image_x;
96 src_y += src_image_y;
97
98 intel_miptree_get_image_offset(dst_mt, dst_level, dst_z,
99 &dst_image_x, &dst_image_y);
100
101 if (_mesa_is_format_compressed(dst_mt->format)) {
102 _mesa_get_format_block_size(dst_mt->format, &bw, &bh);
103
104 assert(dst_x % bw == 0);
105 assert(dst_y % bh == 0);
106
107 dst_x /= (int)bw;
108 dst_y /= (int)bh;
109 }
110 dst_x += dst_image_x;
111 dst_y += dst_image_y;
112
113 return intelEmitCopyBlit(brw,
114 src_mt->cpp,
115 src_mt->pitch,
116 src_mt->bo, src_mt->offset,
117 src_mt->tiling,
118 src_mt->tr_mode,
119 dst_mt->pitch,
120 dst_mt->bo, dst_mt->offset,
121 dst_mt->tiling,
122 dst_mt->tr_mode,
123 src_x, src_y,
124 dst_x, dst_y,
125 src_width, src_height,
126 GL_COPY);
127 }
128
129 static void
130 copy_image_with_memcpy(struct brw_context *brw,
131 struct intel_mipmap_tree *src_mt, int src_level,
132 int src_x, int src_y, int src_z,
133 struct intel_mipmap_tree *dst_mt, int dst_level,
134 int dst_x, int dst_y, int dst_z,
135 int src_width, int src_height)
136 {
137 bool same_slice;
138 void *mapped, *src_mapped, *dst_mapped;
139 ptrdiff_t src_stride, dst_stride, cpp;
140 int map_x1, map_y1, map_x2, map_y2;
141 GLuint src_bw, src_bh;
142
143 cpp = _mesa_get_format_bytes(src_mt->format);
144 _mesa_get_format_block_size(src_mt->format, &src_bw, &src_bh);
145
146 assert(src_width % src_bw == 0);
147 assert(src_height % src_bh == 0);
148 assert(src_x % src_bw == 0);
149 assert(src_y % src_bh == 0);
150
151 /* If we are on the same miptree, same level, and same slice, then
152 * intel_miptree_map won't let us map it twice. We have to do things a
153 * bit differently. In particular, we do a single map large enough for
154 * both portions and in read-write mode.
155 */
156 same_slice = src_mt == dst_mt && src_level == dst_level && src_z == dst_z;
157
158 if (same_slice) {
159 assert(dst_x % src_bw == 0);
160 assert(dst_y % src_bh == 0);
161
162 map_x1 = MIN2(src_x, dst_x);
163 map_y1 = MIN2(src_y, dst_y);
164 map_x2 = MAX2(src_x, dst_x) + src_width;
165 map_y2 = MAX2(src_y, dst_y) + src_height;
166
167 intel_miptree_map(brw, src_mt, src_level, src_z,
168 map_x1, map_y1, map_x2 - map_x1, map_y2 - map_y1,
169 GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
170 &mapped, &src_stride);
171
172 dst_stride = src_stride;
173
174 /* Set the offsets here so we don't have to think about while looping */
175 src_mapped = mapped + ((src_y - map_y1) / src_bh) * src_stride +
176 ((src_x - map_x1) / src_bw) * cpp;
177 dst_mapped = mapped + ((dst_y - map_y1) / src_bh) * dst_stride +
178 ((dst_x - map_x1) / src_bw) * cpp;
179 } else {
180 intel_miptree_map(brw, src_mt, src_level, src_z,
181 src_x, src_y, src_width, src_height,
182 GL_MAP_READ_BIT, &src_mapped, &src_stride);
183 intel_miptree_map(brw, dst_mt, dst_level, dst_z,
184 dst_x, dst_y, src_width, src_height,
185 GL_MAP_WRITE_BIT, &dst_mapped, &dst_stride);
186 }
187
188 src_width /= (int)src_bw;
189 src_height /= (int)src_bh;
190
191 for (int i = 0; i < src_height; ++i) {
192 memcpy(dst_mapped, src_mapped, src_width * cpp);
193 src_mapped += src_stride;
194 dst_mapped += dst_stride;
195 }
196
197 if (same_slice) {
198 intel_miptree_unmap(brw, src_mt, src_level, src_z);
199 } else {
200 intel_miptree_unmap(brw, dst_mt, dst_level, dst_z);
201 intel_miptree_unmap(brw, src_mt, src_level, src_z);
202 }
203 }
204
205 static void
206 copy_miptrees(struct brw_context *brw,
207 struct intel_mipmap_tree *src_mt,
208 int src_x, int src_y, int src_z, unsigned src_level,
209 struct intel_mipmap_tree *dst_mt,
210 int dst_x, int dst_y, int dst_z, unsigned dst_level,
211 int src_width, int src_height)
212 {
213 unsigned bw, bh;
214
215 if (brw->gen >= 6) {
216 brw_blorp_copy_miptrees(brw,
217 src_mt, src_level, src_z,
218 dst_mt, dst_level, dst_z,
219 src_x, src_y, dst_x, dst_y,
220 src_width, src_height);
221 return;
222 }
223
224 /* We are now going to try and copy the texture using the blitter. If
225 * that fails, we will fall back mapping the texture and using memcpy.
226 * In either case, we need to do a full resolve.
227 */
228 intel_miptree_all_slices_resolve_hiz(brw, src_mt);
229 intel_miptree_all_slices_resolve_depth(brw, src_mt);
230 intel_miptree_all_slices_resolve_color(brw, src_mt, 0);
231
232 intel_miptree_all_slices_resolve_hiz(brw, dst_mt);
233 intel_miptree_all_slices_resolve_depth(brw, dst_mt);
234 intel_miptree_all_slices_resolve_color(brw, dst_mt, 0);
235
236 _mesa_get_format_block_size(src_mt->format, &bw, &bh);
237
238 /* It's legal to have a WxH that's smaller than a compressed block. This
239 * happens for example when you are using a higher level LOD. For this case,
240 * we still want to copy the entire block, or else the decompression will be
241 * incorrect.
242 */
243 if (src_width < bw)
244 src_width = ALIGN_NPOT(src_width, bw);
245
246 if (src_height < bh)
247 src_height = ALIGN_NPOT(src_height, bh);
248
249 if (copy_image_with_blitter(brw, src_mt, src_level,
250 src_x, src_y, src_z,
251 dst_mt, dst_level,
252 dst_x, dst_y, dst_z,
253 src_width, src_height))
254 return;
255
256 /* This is a worst-case scenario software fallback that maps the two
257 * textures and does a memcpy between them.
258 */
259 copy_image_with_memcpy(brw, src_mt, src_level,
260 src_x, src_y, src_z,
261 dst_mt, dst_level,
262 dst_x, dst_y, dst_z,
263 src_width, src_height);
264 }
265
266 static void
267 intel_copy_image_sub_data(struct gl_context *ctx,
268 struct gl_texture_image *src_image,
269 struct gl_renderbuffer *src_renderbuffer,
270 int src_x, int src_y, int src_z,
271 struct gl_texture_image *dst_image,
272 struct gl_renderbuffer *dst_renderbuffer,
273 int dst_x, int dst_y, int dst_z,
274 int src_width, int src_height)
275 {
276 struct brw_context *brw = brw_context(ctx);
277 struct intel_mipmap_tree *src_mt, *dst_mt;
278 unsigned src_level, dst_level;
279
280 if (brw->gen < 6 &&
281 _mesa_meta_CopyImageSubData_uncompressed(ctx,
282 src_image, src_renderbuffer,
283 src_x, src_y, src_z,
284 dst_image, dst_renderbuffer,
285 dst_x, dst_y, dst_z,
286 src_width, src_height)) {
287 return;
288 }
289
290 if (src_image) {
291 src_mt = intel_texture_image(src_image)->mt;
292 src_level = src_image->Level + src_image->TexObject->MinLevel;
293
294 /* Cube maps actually have different images per face */
295 if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
296 src_z = src_image->Face;
297
298 src_z += src_image->TexObject->MinLayer;
299 } else {
300 assert(src_renderbuffer);
301 src_mt = intel_renderbuffer(src_renderbuffer)->mt;
302 src_image = src_renderbuffer->TexImage;
303 src_level = 0;
304 }
305
306 if (dst_image) {
307 dst_mt = intel_texture_image(dst_image)->mt;
308
309 dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
310
311 /* Cube maps actually have different images per face */
312 if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
313 dst_z = dst_image->Face;
314
315 dst_z += dst_image->TexObject->MinLayer;
316 } else {
317 assert(dst_renderbuffer);
318 dst_mt = intel_renderbuffer(dst_renderbuffer)->mt;
319 dst_image = dst_renderbuffer->TexImage;
320 dst_level = 0;
321 }
322
323 copy_miptrees(brw, src_mt, src_x, src_y, src_z, src_level,
324 dst_mt, dst_x, dst_y, dst_z, dst_level,
325 src_width, src_height);
326
327 /* CopyImage only works for equal formats, texture view equivalence
328 * classes, and a couple special cases for compressed textures.
329 *
330 * Notably, GL_DEPTH_STENCIL does not appear in any equivalence
331 * classes, so we know the formats must be the same, and thus both
332 * will either have stencil, or not. They can't be mismatched.
333 */
334 assert((src_mt->stencil_mt != NULL) == (dst_mt->stencil_mt != NULL));
335
336 if (dst_mt->stencil_mt) {
337 copy_miptrees(brw, src_mt->stencil_mt, src_x, src_y, src_z, src_level,
338 dst_mt->stencil_mt, dst_x, dst_y, dst_z, dst_level,
339 src_width, src_height);
340 }
341 }
342
343 void
344 intelInitCopyImageFuncs(struct dd_function_table *functions)
345 {
346 functions->CopyImageSubData = intel_copy_image_sub_data;
347 }