3c1695950eb733d964c15cb6384aec7c68efb39a
[mesa.git] / src / gallium / drivers / ilo / ilo_blit.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 2012-2013 LunarG, Inc.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Chia-I Wu <olv@lunarg.com>
26 */
27
28 #include "util/u_pack_color.h"
29 #include "util/u_surface.h"
30 #include "intel_reg.h"
31
32 #include "ilo_blitter.h"
33 #include "ilo_context.h"
34 #include "ilo_cp.h"
35 #include "ilo_resource.h"
36 #include "ilo_screen.h"
37 #include "ilo_blit.h"
38
39 /*
40 * From the Sandy Bridge PRM, volume 1 part 5, page 7:
41 *
42 * "The BLT engine is capable of transferring very large quantities of
43 * graphics data. Any graphics data read from and written to the
44 * destination is permitted to represent a number of pixels that occupies
45 * up to 65,536 scan lines and up to 32,768 bytes per scan line at the
46 * destination. The maximum number of pixels that may be represented per
47 * scan line's worth of graphics data depends on the color depth."
48 */
49 static const int gen6_max_bytes_per_scanline = 32768;
50 static const int gen6_max_scanlines = 65536;
51
52 static void
53 ilo_blit_own_blt_ring(struct ilo_context *ilo)
54 {
55 ilo_cp_set_ring(ilo->cp, ILO_CP_RING_BLT);
56 ilo_cp_set_owner(ilo->cp, NULL, 0);
57 }
58
59 static void
60 gen6_MI_FLUSH_DW(struct ilo_context *ilo)
61 {
62 const uint8_t cmd_len = 4;
63 struct ilo_cp *cp = ilo->cp;
64
65 ilo_cp_begin(cp, cmd_len);
66 ilo_cp_write(cp, MI_FLUSH_DW | (cmd_len - 2));
67 ilo_cp_write(cp, 0);
68 ilo_cp_write(cp, 0);
69 ilo_cp_write(cp, 0);
70 ilo_cp_end(cp);
71 }
72
73 static void
74 gen6_MI_LOAD_REGISTER_IMM(struct ilo_context *ilo, uint32_t reg, uint32_t val)
75 {
76 const uint8_t cmd_len = 3;
77 struct ilo_cp *cp = ilo->cp;
78
79 ilo_cp_begin(cp, cmd_len);
80 ilo_cp_write(cp, MI_LOAD_REGISTER_IMM | (cmd_len - 2));
81 ilo_cp_write(cp, reg);
82 ilo_cp_write(cp, val);
83 ilo_cp_end(cp);
84 }
85
86 static void
87 gen6_XY_COLOR_BLT(struct ilo_context *ilo, struct intel_bo *dst_bo,
88 uint32_t dst_offset, int16_t dst_pitch,
89 enum intel_tiling_mode dst_tiling,
90 int16_t x1, int16_t y1, int16_t x2, int16_t y2,
91 uint32_t color,
92 uint8_t rop, int cpp, bool write_alpha)
93 {
94 const uint8_t cmd_len = 6;
95 struct ilo_cp *cp = ilo->cp;
96 int dst_align, dst_pitch_shift;
97 uint32_t dw0, dw1;
98
99 dw0 = XY_COLOR_BLT_CMD | (cmd_len - 2);
100
101 if (dst_tiling == INTEL_TILING_NONE) {
102 dst_align = 4;
103 dst_pitch_shift = 0;
104 }
105 else {
106 dw0 |= XY_DST_TILED;
107
108 dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512;
109 /* in dwords when tiled */
110 dst_pitch_shift = 2;
111 }
112
113 assert(cpp == 4 || cpp == 2 || cpp == 1);
114 assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline);
115 assert(y2 - y1 < gen6_max_scanlines);
116 assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0);
117
118 dw1 = rop << 16 |
119 dst_pitch >> dst_pitch_shift;
120
121 switch (cpp) {
122 case 4:
123 dw0 |= XY_BLT_WRITE_RGB;
124 if (write_alpha)
125 dw0 |= XY_BLT_WRITE_ALPHA;
126
127 dw1 |= BR13_8888;
128 break;
129 case 2:
130 dw1 |= BR13_565;
131 break;
132 case 1:
133 dw1 |= BR13_8;
134 break;
135 }
136
137 ilo_cp_begin(cp, cmd_len);
138 ilo_cp_write(cp, dw0);
139 ilo_cp_write(cp, dw1);
140 ilo_cp_write(cp, y1 << 16 | x1);
141 ilo_cp_write(cp, y2 << 16 | x2);
142 ilo_cp_write_bo(cp, dst_offset, dst_bo,
143 INTEL_DOMAIN_RENDER, INTEL_DOMAIN_RENDER);
144 ilo_cp_write(cp, color);
145 ilo_cp_end(cp);
146 }
147
148 static void
149 gen6_SRC_COPY_BLT(struct ilo_context *ilo, struct intel_bo *dst_bo,
150 uint32_t dst_offset, int16_t dst_pitch,
151 uint16_t width, uint16_t height,
152 struct intel_bo *src_bo,
153 uint32_t src_offset, int16_t src_pitch,
154 uint8_t rop, int cpp, bool write_alpha, bool dir_rtl)
155 {
156 const uint8_t cmd_len = 6;
157 struct ilo_cp *cp = ilo->cp;
158 uint32_t dw0, dw1;
159
160 assert(cpp == 4 || cpp == 2 || cpp == 1);
161 assert(width < gen6_max_bytes_per_scanline);
162 assert(height < gen6_max_scanlines);
163 /* offsets are naturally aligned and pitches are dword-aligned */
164 assert(dst_offset % cpp == 0 && dst_pitch % 4 == 0);
165 assert(src_offset % cpp == 0 && src_pitch % 4 == 0);
166
167 #ifndef SRC_COPY_BLT_CMD
168 #define SRC_COPY_BLT_CMD (CMD_2D | (0x43 << 22))
169 #endif
170 dw0 = SRC_COPY_BLT_CMD | (cmd_len - 2);
171 dw1 = rop << 16 | dst_pitch;
172
173 if (dir_rtl)
174 dw1 |= 1 << 30;
175
176 switch (cpp) {
177 case 4:
178 dw0 |= XY_BLT_WRITE_RGB;
179 if (write_alpha)
180 dw0 |= XY_BLT_WRITE_ALPHA;
181
182 dw1 |= BR13_8888;
183 break;
184 case 2:
185 dw1 |= BR13_565;
186 break;
187 case 1:
188 dw1 |= BR13_8;
189 break;
190 }
191
192 ilo_cp_begin(cp, cmd_len);
193 ilo_cp_write(cp, dw0);
194 ilo_cp_write(cp, dw1);
195 ilo_cp_write(cp, height << 16 | width);
196 ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER,
197 INTEL_DOMAIN_RENDER);
198 ilo_cp_write(cp, src_pitch);
199 ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0);
200 ilo_cp_end(cp);
201 }
202
203 static void
204 gen6_XY_SRC_COPY_BLT(struct ilo_context *ilo, struct intel_bo *dst_bo,
205 uint32_t dst_offset, int16_t dst_pitch,
206 enum intel_tiling_mode dst_tiling,
207 int16_t x1, int16_t y1, int16_t x2, int16_t y2,
208 struct intel_bo *src_bo,
209 uint32_t src_offset, int16_t src_pitch,
210 enum intel_tiling_mode src_tiling,
211 int16_t src_x, int16_t src_y,
212 uint8_t rop, int cpp, bool write_alpha)
213 {
214 const uint8_t cmd_len = 8;
215 struct ilo_cp *cp = ilo->cp;
216 int dst_align, dst_pitch_shift;
217 int src_align, src_pitch_shift;
218 uint32_t dw0, dw1;
219
220 dw0 = XY_SRC_COPY_BLT_CMD | (cmd_len - 2);
221
222 if (dst_tiling == INTEL_TILING_NONE) {
223 dst_align = 4;
224 dst_pitch_shift = 0;
225 }
226 else {
227 dw0 |= XY_DST_TILED;
228
229 dst_align = (dst_tiling == INTEL_TILING_Y) ? 128 : 512;
230 /* in dwords when tiled */
231 dst_pitch_shift = 2;
232 }
233
234 if (src_tiling == INTEL_TILING_NONE) {
235 src_align = 4;
236 src_pitch_shift = 0;
237 }
238 else {
239 dw0 |= XY_SRC_TILED;
240
241 src_align = (src_tiling == INTEL_TILING_Y) ? 128 : 512;
242 /* in dwords when tiled */
243 src_pitch_shift = 2;
244 }
245
246 assert(cpp == 4 || cpp == 2 || cpp == 1);
247 assert((x2 - x1) * cpp < gen6_max_bytes_per_scanline);
248 assert(y2 - y1 < gen6_max_scanlines);
249 assert(dst_offset % dst_align == 0 && dst_pitch % dst_align == 0);
250 assert(src_offset % src_align == 0 && src_pitch % src_align == 0);
251
252 dw1 = rop << 16 |
253 dst_pitch >> dst_pitch_shift;
254
255 switch (cpp) {
256 case 4:
257 dw0 |= XY_BLT_WRITE_RGB;
258 if (write_alpha)
259 dw0 |= XY_BLT_WRITE_ALPHA;
260
261 dw1 |= BR13_8888;
262 break;
263 case 2:
264 dw1 |= BR13_565;
265 break;
266 case 1:
267 dw1 |= BR13_8;
268 break;
269 }
270
271 ilo_cp_begin(cp, cmd_len);
272 ilo_cp_write(cp, dw0);
273 ilo_cp_write(cp, dw1);
274 ilo_cp_write(cp, y1 << 16 | x1);
275 ilo_cp_write(cp, y2 << 16 | x2);
276 ilo_cp_write_bo(cp, dst_offset, dst_bo, INTEL_DOMAIN_RENDER,
277 INTEL_DOMAIN_RENDER);
278 ilo_cp_write(cp, src_y << 16 | src_x);
279 ilo_cp_write(cp, src_pitch >> src_pitch_shift);
280 ilo_cp_write_bo(cp, src_offset, src_bo, INTEL_DOMAIN_RENDER, 0);
281 ilo_cp_end(cp);
282 }
283
284 static bool
285 tex_copy_region(struct ilo_context *ilo,
286 struct ilo_texture *dst,
287 unsigned dst_level,
288 unsigned dst_x, unsigned dst_y, unsigned dst_z,
289 struct ilo_texture *src,
290 unsigned src_level,
291 const struct pipe_box *src_box)
292 {
293 const struct util_format_description *desc =
294 util_format_description(dst->bo_format);
295 const unsigned max_extent = 32767; /* INT16_MAX */
296 const uint8_t rop = 0xcc; /* SRCCOPY */
297 struct intel_bo *aper_check[3];
298 uint32_t swctrl;
299 int cpp, xscale, slice;
300
301 /* no W-tiling support */
302 if (dst->separate_s8 || src->separate_s8)
303 return false;
304
305 if (dst->bo_stride > max_extent || src->bo_stride > max_extent)
306 return false;
307
308 cpp = desc->block.bits / 8;
309 xscale = 1;
310
311 /* accommodate for larger cpp */
312 if (cpp > 4) {
313 if (cpp % 2 == 1)
314 return false;
315
316 cpp = (cpp % 4 == 0) ? 4 : 2;
317 xscale = (desc->block.bits / 8) / cpp;
318 }
319
320 ilo_blit_own_blt_ring(ilo);
321
322 /* make room if necessary */
323 aper_check[0] = ilo->cp->bo;
324 aper_check[1] = dst->bo;
325 aper_check[2] = src->bo;
326 if (intel_winsys_check_aperture_space(ilo->winsys, aper_check, 3))
327 ilo_cp_flush(ilo->cp);
328
329 swctrl = 0x0;
330
331 if (dst->tiling == INTEL_TILING_Y) {
332 swctrl |= BCS_SWCTRL_DST_Y << 16 |
333 BCS_SWCTRL_DST_Y;
334 }
335
336 if (src->tiling == INTEL_TILING_Y) {
337 swctrl |= BCS_SWCTRL_SRC_Y << 16 |
338 BCS_SWCTRL_SRC_Y;
339 }
340
341 if (swctrl) {
342 /*
343 * Most clients expect BLT engine to be stateless. If we have to set
344 * BCS_SWCTRL to a non-default value, we have to set it back in the same
345 * batch buffer.
346 */
347 if (ilo_cp_space(ilo->cp) < (4 + 3) * 2 + src_box->depth * 8)
348 ilo_cp_flush(ilo->cp);
349
350 ilo_cp_assert_no_implicit_flush(ilo->cp, true);
351
352 /*
353 * From the Ivy Bridge PRM, volume 1 part 4, page 133:
354 *
355 * "SW is required to flush the HW before changing the polarity of
356 * this bit (Tile Y Destination/Source)."
357 */
358 gen6_MI_FLUSH_DW(ilo);
359 gen6_MI_LOAD_REGISTER_IMM(ilo, BCS_SWCTRL, swctrl);
360
361 swctrl &= ~(BCS_SWCTRL_DST_Y | BCS_SWCTRL_SRC_Y);
362 }
363
364 for (slice = 0; slice < src_box->depth; slice++) {
365 const struct ilo_texture_slice *dst_slice =
366 &dst->slice_offsets[dst_level][dst_z + slice];
367 const struct ilo_texture_slice *src_slice =
368 &src->slice_offsets[src_level][src_box->z + slice];
369 unsigned x1, y1, x2, y2, src_x, src_y;
370
371 x1 = (dst_slice->x + dst_x) * xscale;
372 y1 = dst_slice->y + dst_y;
373 x2 = (x1 + src_box->width) * xscale;
374 y2 = y1 + src_box->height;
375 src_x = (src_slice->x + src_box->x) * xscale;
376 src_y = src_slice->y + src_box->y;
377
378 x1 /= desc->block.width;
379 y1 /= desc->block.height;
380 x2 = (x2 + desc->block.width - 1) / desc->block.width;
381 y2 = (y2 + desc->block.height - 1) / desc->block.height;
382 src_x /= desc->block.width;
383 src_y /= desc->block.height;
384
385 if (x2 > max_extent || y2 > max_extent ||
386 src_x > max_extent || src_y > max_extent ||
387 (x2 - x1) * cpp > gen6_max_bytes_per_scanline)
388 break;
389
390 gen6_XY_SRC_COPY_BLT(ilo,
391 dst->bo, 0, dst->bo_stride, dst->tiling,
392 x1, y1, x2, y2,
393 src->bo, 0, src->bo_stride, src->tiling,
394 src_x, src_y, rop, cpp, true);
395 }
396
397 if (swctrl) {
398 gen6_MI_FLUSH_DW(ilo);
399 gen6_MI_LOAD_REGISTER_IMM(ilo, BCS_SWCTRL, swctrl);
400
401 ilo_cp_assert_no_implicit_flush(ilo->cp, false);
402 }
403
404 return (slice == src_box->depth);
405 }
406
407 static bool
408 buf_copy_region(struct ilo_context *ilo,
409 struct ilo_buffer *dst, unsigned dst_offset,
410 struct ilo_buffer *src, unsigned src_offset,
411 unsigned size)
412 {
413 const uint8_t rop = 0xcc; /* SRCCOPY */
414 unsigned offset = 0;
415 struct intel_bo *aper_check[3];
416
417 ilo_blit_own_blt_ring(ilo);
418
419 /* make room if necessary */
420 aper_check[0] = ilo->cp->bo;
421 aper_check[1] = dst->bo;
422 aper_check[2] = src->bo;
423 if (intel_winsys_check_aperture_space(ilo->winsys, aper_check, 3))
424 ilo_cp_flush(ilo->cp);
425
426 while (size) {
427 unsigned width, height;
428 int16_t pitch;
429
430 width = size;
431 height = 1;
432 pitch = 0;
433
434 if (width > gen6_max_bytes_per_scanline) {
435 /* less than INT16_MAX and dword-aligned */
436 pitch = 32764;
437
438 width = pitch;
439 height = size / width;
440 if (height > gen6_max_scanlines)
441 height = gen6_max_scanlines;
442 }
443
444 gen6_SRC_COPY_BLT(ilo,
445 dst->bo, dst_offset + offset, pitch,
446 width, height,
447 src->bo, src_offset + offset, pitch,
448 rop, 1, true, false);
449
450 offset += pitch * height;
451 size -= width * height;
452 }
453
454 return true;
455 }
456
457 static void
458 ilo_resource_copy_region(struct pipe_context *pipe,
459 struct pipe_resource *dst,
460 unsigned dst_level,
461 unsigned dstx, unsigned dsty, unsigned dstz,
462 struct pipe_resource *src,
463 unsigned src_level,
464 const struct pipe_box *src_box)
465 {
466 bool success;
467
468 if (dst->target != PIPE_BUFFER && src->target != PIPE_BUFFER) {
469 success = tex_copy_region(ilo_context(pipe),
470 ilo_texture(dst), dst_level, dstx, dsty, dstz,
471 ilo_texture(src), src_level, src_box);
472 }
473 else if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
474 const unsigned dst_offset = dstx;
475 const unsigned src_offset = src_box->x;
476 const unsigned size = src_box->width;
477
478 assert(dst_level == 0 && dsty == 0 && dstz == 0);
479 assert(src_level == 0 &&
480 src_box->y == 0 &&
481 src_box->z == 0 &&
482 src_box->height == 1 &&
483 src_box->depth == 1);
484
485 success = buf_copy_region(ilo_context(pipe),
486 ilo_buffer(dst), dst_offset, ilo_buffer(src), src_offset, size);
487 }
488 else {
489 success = false;
490 }
491
492 if (!success) {
493 util_resource_copy_region(pipe, dst, dst_level,
494 dstx, dsty, dstz, src, src_level, src_box);
495 }
496 }
497
498 static bool
499 blitter_xy_color_blt(struct pipe_context *pipe,
500 struct pipe_resource *res,
501 int16_t x1, int16_t y1,
502 int16_t x2, int16_t y2,
503 uint32_t color)
504 {
505 struct ilo_context *ilo = ilo_context(pipe);
506 struct ilo_texture *tex = ilo_texture(res);
507 const int cpp = util_format_get_blocksize(tex->bo_format);
508 const uint8_t rop = 0xf0; /* PATCOPY */
509 struct intel_bo *aper_check[2];
510
511 /* how to support Y-tiling? */
512 if (tex->tiling == INTEL_TILING_Y)
513 return false;
514
515 /* nothing to clear */
516 if (x1 >= x2 || y1 >= y2)
517 return true;
518
519 ilo_blit_own_blt_ring(ilo);
520
521 /* make room if necessary */
522 aper_check[0] = ilo->cp->bo;
523 aper_check[1] = tex->bo;
524 if (intel_winsys_check_aperture_space(ilo->winsys, aper_check, 2))
525 ilo_cp_flush(ilo->cp);
526
527 gen6_XY_COLOR_BLT(ilo,
528 tex->bo, 0, tex->bo_stride, tex->tiling,
529 x1, y1, x2, y2, color, rop, cpp, true);
530
531 return true;
532 }
533
534 static void
535 ilo_clear(struct pipe_context *pipe,
536 unsigned buffers,
537 const union pipe_color_union *color,
538 double depth,
539 unsigned stencil)
540 {
541 struct ilo_context *ilo = ilo_context(pipe);
542
543 ilo_blitter_pipe_clear_fb(ilo->blitter, buffers, color, depth, stencil);
544 }
545
546 static void
547 ilo_clear_render_target(struct pipe_context *pipe,
548 struct pipe_surface *dst,
549 const union pipe_color_union *color,
550 unsigned dstx, unsigned dsty,
551 unsigned width, unsigned height)
552 {
553 struct ilo_context *ilo = ilo_context(pipe);
554 union util_color packed;
555
556 if (!width || !height || dstx >= dst->width || dsty >= dst->height)
557 return;
558
559 if (dstx + width > dst->width)
560 width = dst->width - dstx;
561 if (dsty + height > dst->height)
562 height = dst->height - dsty;
563
564 util_pack_color(color->f, dst->format, &packed);
565
566 /* try HW blit first */
567 if (blitter_xy_color_blt(pipe, dst->texture,
568 dstx, dsty,
569 dstx + width, dsty + height,
570 packed.ui))
571 return;
572
573 ilo_blitter_pipe_clear_rt(ilo->blitter,
574 dst, color, dstx, dsty, width, height);
575 }
576
577 static void
578 ilo_clear_depth_stencil(struct pipe_context *pipe,
579 struct pipe_surface *dst,
580 unsigned clear_flags,
581 double depth,
582 unsigned stencil,
583 unsigned dstx, unsigned dsty,
584 unsigned width, unsigned height)
585 {
586 struct ilo_context *ilo = ilo_context(pipe);
587
588 /*
589 * The PRM claims that HW blit supports Y-tiling since GEN6, but it does
590 * not tell us how to program it. Since depth buffers are always Y-tiled,
591 * HW blit will not work.
592 */
593
594 ilo_blitter_pipe_clear_zs(ilo->blitter,
595 dst, clear_flags, depth, stencil, dstx, dsty, width, height);
596 }
597
598 static void
599 ilo_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
600 {
601 struct ilo_context *ilo = ilo_context(pipe);
602
603 ilo_blitter_pipe_blit(ilo->blitter, info);
604 }
605
606 /**
607 * Initialize blit-related functions.
608 */
609 void
610 ilo_init_blit_functions(struct ilo_context *ilo)
611 {
612 ilo->base.resource_copy_region = ilo_resource_copy_region;
613 ilo->base.blit = ilo_blit;
614
615 ilo->base.clear = ilo_clear;
616 ilo->base.clear_render_target = ilo_clear_render_target;
617 ilo->base.clear_depth_stencil = ilo_clear_depth_stencil;
618 }