i965/copy_image: Divide the x offsets by block width when using the blitter
[mesa.git] / src / mesa / drivers / dri / i965 / intel_copy_image.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2014 Intel Corporation All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Jason Ekstrand <jason.ekstrand@intel.com>
26 */
27
28 #include "intel_tex.h"
29 #include "intel_blit.h"
30 #include "intel_mipmap_tree.h"
31 #include "main/formats.h"
32 #include "drivers/common/meta.h"
33
34 static bool
35 copy_image_with_blitter(struct brw_context *brw,
36 struct intel_mipmap_tree *src_mt, int src_level,
37 int src_x, int src_y, int src_z,
38 struct intel_mipmap_tree *dst_mt, int dst_level,
39 int dst_x, int dst_y, int dst_z,
40 int src_width, int src_height)
41 {
42 GLuint bw, bh;
43 uint32_t src_image_x, src_image_y, dst_image_x, dst_image_y;
44 int cpp;
45
46 /* The blitter doesn't understand multisampling at all. */
47 if (src_mt->num_samples > 0 || dst_mt->num_samples > 0)
48 return false;
49
50 /* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
51 * Data Size Limitations):
52 *
53 * The BLT engine is capable of transferring very large quantities of
54 * graphics data. Any graphics data read from and written to the
55 * destination is permitted to represent a number of pixels that
56 * occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
57 * at the destination. The maximum number of pixels that may be
58 * represented per scan line’s worth of graphics data depends on the
59 * color depth.
60 *
61 * Furthermore, intelEmitCopyBlit (which is called below) uses a signed
62 * 16-bit integer to represent buffer pitch, so it can only handle buffer
63 * pitches < 32k.
64 *
65 * As a result of these two limitations, we can only use the blitter to do
66 * this copy when the miptree's pitch is less than 32k.
67 */
68 if (src_mt->pitch >= 32768 ||
69 dst_mt->pitch >= 32768) {
70 perf_debug("Falling back due to >=32k pitch\n");
71 return false;
72 }
73
74 intel_miptree_get_image_offset(src_mt, src_level, src_z,
75 &src_image_x, &src_image_y);
76
77 if (_mesa_is_format_compressed(src_mt->format)) {
78 _mesa_get_format_block_size(src_mt->format, &bw, &bh);
79
80 assert(src_x % bw == 0);
81 assert(src_y % bh == 0);
82 assert(src_width % bw == 0);
83 assert(src_height % bh == 0);
84
85 src_x /= (int)bw;
86 src_y /= (int)bh;
87 src_width /= (int)bw;
88 src_height /= (int)bh;
89
90 /* Inside of the miptree, the x offsets are stored in pixels while
91 * the y offsets are stored in blocks. We need to scale just the x
92 * offset.
93 */
94 src_image_x /= bw;
95
96 cpp = _mesa_get_format_bytes(src_mt->format);
97 } else {
98 cpp = src_mt->cpp;
99 }
100 src_x += src_image_x;
101 src_y += src_image_y;
102
103 intel_miptree_get_image_offset(dst_mt, dst_level, dst_z,
104 &dst_image_x, &dst_image_y);
105
106 if (_mesa_is_format_compressed(dst_mt->format)) {
107 _mesa_get_format_block_size(dst_mt->format, &bw, &bh);
108
109 assert(dst_x % bw == 0);
110 assert(dst_y % bh == 0);
111
112 dst_x /= (int)bw;
113 dst_y /= (int)bh;
114
115 /* Inside of the miptree, the x offsets are stored in pixels while
116 * the y offsets are stored in blocks. We need to scale just the x
117 * offset.
118 */
119 dst_image_x /= bw;
120 }
121 dst_x += dst_image_x;
122 dst_y += dst_image_y;
123
124 return intelEmitCopyBlit(brw,
125 cpp,
126 src_mt->pitch,
127 src_mt->bo, src_mt->offset,
128 src_mt->tiling,
129 dst_mt->pitch,
130 dst_mt->bo, dst_mt->offset,
131 dst_mt->tiling,
132 src_x, src_y,
133 dst_x, dst_y,
134 src_width, src_height,
135 GL_COPY);
136 }
137
138 static void
139 copy_image_with_memcpy(struct brw_context *brw,
140 struct intel_mipmap_tree *src_mt, int src_level,
141 int src_x, int src_y, int src_z,
142 struct intel_mipmap_tree *dst_mt, int dst_level,
143 int dst_x, int dst_y, int dst_z,
144 int src_width, int src_height)
145 {
146 bool same_slice;
147 uint8_t *mapped, *src_mapped, *dst_mapped;
148 int src_stride, dst_stride, i, cpp;
149 int map_x1, map_y1, map_x2, map_y2;
150 GLuint src_bw, src_bh;
151
152 cpp = _mesa_get_format_bytes(src_mt->format);
153 _mesa_get_format_block_size(src_mt->format, &src_bw, &src_bh);
154
155 assert(src_width % src_bw == 0);
156 assert(src_height % src_bw == 0);
157 assert(src_x % src_bw == 0);
158 assert(src_y % src_bw == 0);
159
160 /* If we are on the same miptree, same level, and same slice, then
161 * intel_miptree_map won't let us map it twice. We have to do things a
162 * bit differently. In particular, we do a single map large enough for
163 * both portions and in read-write mode.
164 */
165 same_slice = src_mt == dst_mt && src_level == dst_level && src_z == dst_z;
166
167 if (same_slice) {
168 assert(dst_x % src_bw == 0);
169 assert(dst_y % src_bw == 0);
170
171 map_x1 = MIN2(src_x, dst_x);
172 map_y1 = MIN2(src_y, dst_y);
173 map_x2 = MAX2(src_x, dst_x) + src_width;
174 map_y2 = MAX2(src_y, dst_y) + src_height;
175
176 intel_miptree_map(brw, src_mt, src_level, src_z,
177 map_x1, map_y1, map_x2 - map_x1, map_y2 - map_y1,
178 GL_MAP_READ_BIT | GL_MAP_WRITE_BIT,
179 (void **)&mapped, &src_stride);
180
181 dst_stride = src_stride;
182
183 /* Set the offsets here so we don't have to think about while looping */
184 src_mapped = mapped + ((src_y - map_y1) / src_bh) * src_stride +
185 ((src_x - map_x1) / src_bw) * cpp;
186 dst_mapped = mapped + ((dst_y - map_y1) / src_bh) * dst_stride +
187 ((dst_x - map_x1) / src_bw) * cpp;
188 } else {
189 intel_miptree_map(brw, src_mt, src_level, src_z,
190 src_x, src_y, src_width, src_height,
191 GL_MAP_READ_BIT, (void **)&src_mapped, &src_stride);
192 intel_miptree_map(brw, dst_mt, dst_level, dst_z,
193 dst_x, dst_y, src_width, src_height,
194 GL_MAP_WRITE_BIT, (void **)&dst_mapped, &dst_stride);
195 }
196
197 src_width /= (int)src_bw;
198 src_height /= (int)src_bh;
199
200 for (i = 0; i < src_height; ++i) {
201 memcpy(dst_mapped, src_mapped, src_width * cpp);
202 src_mapped += src_stride;
203 dst_mapped += dst_stride;
204 }
205
206 if (same_slice) {
207 intel_miptree_unmap(brw, src_mt, src_level, src_z);
208 } else {
209 intel_miptree_unmap(brw, dst_mt, dst_level, dst_z);
210 intel_miptree_unmap(brw, src_mt, src_level, src_z);
211 }
212 }
213
214 static void
215 intel_copy_image_sub_data(struct gl_context *ctx,
216 struct gl_texture_image *src_image,
217 int src_x, int src_y, int src_z,
218 struct gl_texture_image *dst_image,
219 int dst_x, int dst_y, int dst_z,
220 int src_width, int src_height)
221 {
222 struct brw_context *brw = brw_context(ctx);
223 struct intel_texture_image *intel_src_image = intel_texture_image(src_image);
224 struct intel_texture_image *intel_dst_image = intel_texture_image(dst_image);
225
226 if (_mesa_meta_CopyImageSubData_uncompressed(ctx,
227 src_image, src_x, src_y, src_z,
228 dst_image, dst_x, dst_y, dst_z,
229 src_width, src_height)) {
230 return;
231 }
232
233 if (intel_src_image->mt->num_samples > 0 ||
234 intel_dst_image->mt->num_samples > 0) {
235 _mesa_problem(ctx, "Failed to copy multisampled texture with meta path\n");
236 return;
237 }
238
239 /* Cube maps actually have different images per face */
240 if (src_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
241 src_z = src_image->Face;
242 if (dst_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
243 dst_z = dst_image->Face;
244
245 /* We are now going to try and copy the texture using the blitter. If
246 * that fails, we will fall back mapping the texture and using memcpy.
247 * In either case, we need to do a full resolve.
248 */
249 intel_miptree_all_slices_resolve_hiz(brw, intel_src_image->mt);
250 intel_miptree_all_slices_resolve_depth(brw, intel_src_image->mt);
251 intel_miptree_resolve_color(brw, intel_src_image->mt);
252
253 intel_miptree_all_slices_resolve_hiz(brw, intel_dst_image->mt);
254 intel_miptree_all_slices_resolve_depth(brw, intel_dst_image->mt);
255 intel_miptree_resolve_color(brw, intel_dst_image->mt);
256
257 unsigned src_level = src_image->Level + src_image->TexObject->MinLevel;
258 unsigned dst_level = dst_image->Level + dst_image->TexObject->MinLevel;
259 if (copy_image_with_blitter(brw, intel_src_image->mt, src_level,
260 src_x, src_y, src_z,
261 intel_dst_image->mt, dst_level,
262 dst_x, dst_y, dst_z,
263 src_width, src_height))
264 return;
265
266 /* This is a worst-case scenario software fallback that maps the two
267 * textures and does a memcpy between them.
268 */
269 copy_image_with_memcpy(brw, intel_src_image->mt, src_level,
270 src_x, src_y, src_z,
271 intel_dst_image->mt, dst_level,
272 dst_x, dst_y, dst_z,
273 src_width, src_height);
274 }
275
276 void
277 intelInitCopyImageFuncs(struct dd_function_table *functions)
278 {
279 functions->CopyImageSubData = intel_copy_image_sub_data;
280 }