2 * Mesa 3-D graphics library
4 * Copyright (C) 2012-2013 LunarG, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22 * DEALINGS IN THE SOFTWARE.
25 * Chia-I Wu <olv@lunarg.com>
28 #include "util/u_pack_color.h"
29 #include "util/u_surface.h"
30 #include "intel_reg.h"
32 #include "ilo_blitter.h"
33 #include "ilo_context.h"
35 #include "ilo_resource.h"
36 #include "ilo_screen.h"
40 * From the Sandy Bridge PRM, volume 1 part 5, page 7:
42 * "The BLT engine is capable of transferring very large quantities of
43 * graphics data. Any graphics data read from and written to the
44 * destination is permitted to represent a number of pixels that occupies
45 * up to 65,536 scan lines and up to 32,768 bytes per scan line at the
46 * destination. The maximum number of pixels that may be represented per
47 * scan line's worth of graphics data depends on the color depth."
49 static const int gen6_max_bytes_per_scanline
= 32768;
50 static const int gen6_max_scanlines
= 65536;
53 ilo_blit_own_blt_ring(struct ilo_context
*ilo
)
55 ilo_cp_set_ring(ilo
->cp
, ILO_CP_RING_BLT
);
56 ilo_cp_set_owner(ilo
->cp
, NULL
, 0);
60 gen6_MI_FLUSH_DW(struct ilo_context
*ilo
)
62 const uint8_t cmd_len
= 4;
63 struct ilo_cp
*cp
= ilo
->cp
;
65 ilo_cp_begin(cp
, cmd_len
);
66 ilo_cp_write(cp
, MI_FLUSH_DW
| (cmd_len
- 2));
74 gen6_MI_LOAD_REGISTER_IMM(struct ilo_context
*ilo
, uint32_t reg
, uint32_t val
)
76 const uint8_t cmd_len
= 3;
77 struct ilo_cp
*cp
= ilo
->cp
;
79 ilo_cp_begin(cp
, cmd_len
);
80 ilo_cp_write(cp
, MI_LOAD_REGISTER_IMM
| (cmd_len
- 2));
81 ilo_cp_write(cp
, reg
);
82 ilo_cp_write(cp
, val
);
87 gen6_XY_COLOR_BLT(struct ilo_context
*ilo
, struct intel_bo
*dst_bo
,
88 uint32_t dst_offset
, int16_t dst_pitch
,
89 enum intel_tiling_mode dst_tiling
,
90 int16_t x1
, int16_t y1
, int16_t x2
, int16_t y2
,
92 uint8_t rop
, int cpp
, bool write_alpha
)
94 const uint8_t cmd_len
= 6;
95 struct ilo_cp
*cp
= ilo
->cp
;
96 int dst_align
, dst_pitch_shift
;
99 dw0
= XY_COLOR_BLT_CMD
| (cmd_len
- 2);
101 if (dst_tiling
== INTEL_TILING_NONE
) {
108 dst_align
= (dst_tiling
== INTEL_TILING_Y
) ? 128 : 512;
109 /* in dwords when tiled */
113 assert(cpp
== 4 || cpp
== 2 || cpp
== 1);
114 assert((x2
- x1
) * cpp
< gen6_max_bytes_per_scanline
);
115 assert(y2
- y1
< gen6_max_scanlines
);
116 assert(dst_offset
% dst_align
== 0 && dst_pitch
% dst_align
== 0);
119 dst_pitch
>> dst_pitch_shift
;
123 dw0
|= XY_BLT_WRITE_RGB
;
125 dw0
|= XY_BLT_WRITE_ALPHA
;
137 ilo_cp_begin(cp
, cmd_len
);
138 ilo_cp_write(cp
, dw0
);
139 ilo_cp_write(cp
, dw1
);
140 ilo_cp_write(cp
, y1
<< 16 | x1
);
141 ilo_cp_write(cp
, y2
<< 16 | x2
);
142 ilo_cp_write_bo(cp
, dst_offset
, dst_bo
,
143 INTEL_DOMAIN_RENDER
, INTEL_DOMAIN_RENDER
);
144 ilo_cp_write(cp
, color
);
149 gen6_SRC_COPY_BLT(struct ilo_context
*ilo
, struct intel_bo
*dst_bo
,
150 uint32_t dst_offset
, int16_t dst_pitch
,
151 uint16_t width
, uint16_t height
,
152 struct intel_bo
*src_bo
,
153 uint32_t src_offset
, int16_t src_pitch
,
154 uint8_t rop
, int cpp
, bool write_alpha
, bool dir_rtl
)
156 const uint8_t cmd_len
= 6;
157 struct ilo_cp
*cp
= ilo
->cp
;
160 assert(cpp
== 4 || cpp
== 2 || cpp
== 1);
161 assert(width
< gen6_max_bytes_per_scanline
);
162 assert(height
< gen6_max_scanlines
);
163 /* offsets are naturally aligned and pitches are dword-aligned */
164 assert(dst_offset
% cpp
== 0 && dst_pitch
% 4 == 0);
165 assert(src_offset
% cpp
== 0 && src_pitch
% 4 == 0);
167 #ifndef SRC_COPY_BLT_CMD
168 #define SRC_COPY_BLT_CMD (CMD_2D | (0x43 << 22))
170 dw0
= SRC_COPY_BLT_CMD
| (cmd_len
- 2);
171 dw1
= rop
<< 16 | dst_pitch
;
178 dw0
|= XY_BLT_WRITE_RGB
;
180 dw0
|= XY_BLT_WRITE_ALPHA
;
192 ilo_cp_begin(cp
, cmd_len
);
193 ilo_cp_write(cp
, dw0
);
194 ilo_cp_write(cp
, dw1
);
195 ilo_cp_write(cp
, height
<< 16 | width
);
196 ilo_cp_write_bo(cp
, dst_offset
, dst_bo
, INTEL_DOMAIN_RENDER
,
197 INTEL_DOMAIN_RENDER
);
198 ilo_cp_write(cp
, src_pitch
);
199 ilo_cp_write_bo(cp
, src_offset
, src_bo
, INTEL_DOMAIN_RENDER
, 0);
204 gen6_XY_SRC_COPY_BLT(struct ilo_context
*ilo
, struct intel_bo
*dst_bo
,
205 uint32_t dst_offset
, int16_t dst_pitch
,
206 enum intel_tiling_mode dst_tiling
,
207 int16_t x1
, int16_t y1
, int16_t x2
, int16_t y2
,
208 struct intel_bo
*src_bo
,
209 uint32_t src_offset
, int16_t src_pitch
,
210 enum intel_tiling_mode src_tiling
,
211 int16_t src_x
, int16_t src_y
,
212 uint8_t rop
, int cpp
, bool write_alpha
)
214 const uint8_t cmd_len
= 8;
215 struct ilo_cp
*cp
= ilo
->cp
;
216 int dst_align
, dst_pitch_shift
;
217 int src_align
, src_pitch_shift
;
220 dw0
= XY_SRC_COPY_BLT_CMD
| (cmd_len
- 2);
222 if (dst_tiling
== INTEL_TILING_NONE
) {
229 dst_align
= (dst_tiling
== INTEL_TILING_Y
) ? 128 : 512;
230 /* in dwords when tiled */
234 if (src_tiling
== INTEL_TILING_NONE
) {
241 src_align
= (src_tiling
== INTEL_TILING_Y
) ? 128 : 512;
242 /* in dwords when tiled */
246 assert(cpp
== 4 || cpp
== 2 || cpp
== 1);
247 assert((x2
- x1
) * cpp
< gen6_max_bytes_per_scanline
);
248 assert(y2
- y1
< gen6_max_scanlines
);
249 assert(dst_offset
% dst_align
== 0 && dst_pitch
% dst_align
== 0);
250 assert(src_offset
% src_align
== 0 && src_pitch
% src_align
== 0);
253 dst_pitch
>> dst_pitch_shift
;
257 dw0
|= XY_BLT_WRITE_RGB
;
259 dw0
|= XY_BLT_WRITE_ALPHA
;
271 ilo_cp_begin(cp
, cmd_len
);
272 ilo_cp_write(cp
, dw0
);
273 ilo_cp_write(cp
, dw1
);
274 ilo_cp_write(cp
, y1
<< 16 | x1
);
275 ilo_cp_write(cp
, y2
<< 16 | x2
);
276 ilo_cp_write_bo(cp
, dst_offset
, dst_bo
, INTEL_DOMAIN_RENDER
,
277 INTEL_DOMAIN_RENDER
);
278 ilo_cp_write(cp
, src_y
<< 16 | src_x
);
279 ilo_cp_write(cp
, src_pitch
>> src_pitch_shift
);
280 ilo_cp_write_bo(cp
, src_offset
, src_bo
, INTEL_DOMAIN_RENDER
, 0);
285 tex_copy_region(struct ilo_context
*ilo
,
286 struct ilo_texture
*dst
,
288 unsigned dst_x
, unsigned dst_y
, unsigned dst_z
,
289 struct ilo_texture
*src
,
291 const struct pipe_box
*src_box
)
293 const struct util_format_description
*desc
=
294 util_format_description(dst
->bo_format
);
295 const unsigned max_extent
= 32767; /* INT16_MAX */
296 const uint8_t rop
= 0xcc; /* SRCCOPY */
297 struct intel_bo
*aper_check
[3];
299 int cpp
, xscale
, slice
;
301 /* no W-tiling support */
302 if (dst
->separate_s8
|| src
->separate_s8
)
305 if (dst
->bo_stride
> max_extent
|| src
->bo_stride
> max_extent
)
308 cpp
= desc
->block
.bits
/ 8;
311 /* accommodate for larger cpp */
316 cpp
= (cpp
% 4 == 0) ? 4 : 2;
317 xscale
= (desc
->block
.bits
/ 8) / cpp
;
320 ilo_blit_own_blt_ring(ilo
);
322 /* make room if necessary */
323 aper_check
[0] = ilo
->cp
->bo
;
324 aper_check
[1] = dst
->bo
;
325 aper_check
[2] = src
->bo
;
326 if (intel_winsys_check_aperture_space(ilo
->winsys
, aper_check
, 3))
327 ilo_cp_flush(ilo
->cp
);
331 if (dst
->tiling
== INTEL_TILING_Y
) {
332 swctrl
|= BCS_SWCTRL_DST_Y
<< 16 |
336 if (src
->tiling
== INTEL_TILING_Y
) {
337 swctrl
|= BCS_SWCTRL_SRC_Y
<< 16 |
343 * Most clients expect BLT engine to be stateless. If we have to set
344 * BCS_SWCTRL to a non-default value, we have to set it back in the same
347 if (ilo_cp_space(ilo
->cp
) < (4 + 3) * 2 + src_box
->depth
* 8)
348 ilo_cp_flush(ilo
->cp
);
350 ilo_cp_assert_no_implicit_flush(ilo
->cp
, true);
353 * From the Ivy Bridge PRM, volume 1 part 4, page 133:
355 * "SW is required to flush the HW before changing the polarity of
356 * this bit (Tile Y Destination/Source)."
358 gen6_MI_FLUSH_DW(ilo
);
359 gen6_MI_LOAD_REGISTER_IMM(ilo
, BCS_SWCTRL
, swctrl
);
361 swctrl
&= ~(BCS_SWCTRL_DST_Y
| BCS_SWCTRL_SRC_Y
);
364 for (slice
= 0; slice
< src_box
->depth
; slice
++) {
365 const struct ilo_texture_slice
*dst_slice
=
366 &dst
->slice_offsets
[dst_level
][dst_z
+ slice
];
367 const struct ilo_texture_slice
*src_slice
=
368 &src
->slice_offsets
[src_level
][src_box
->z
+ slice
];
369 unsigned x1
, y1
, x2
, y2
, src_x
, src_y
;
371 x1
= (dst_slice
->x
+ dst_x
) * xscale
;
372 y1
= dst_slice
->y
+ dst_y
;
373 x2
= (x1
+ src_box
->width
) * xscale
;
374 y2
= y1
+ src_box
->height
;
375 src_x
= (src_slice
->x
+ src_box
->x
) * xscale
;
376 src_y
= src_slice
->y
+ src_box
->y
;
378 x1
/= desc
->block
.width
;
379 y1
/= desc
->block
.height
;
380 x2
= (x2
+ desc
->block
.width
- 1) / desc
->block
.width
;
381 y2
= (y2
+ desc
->block
.height
- 1) / desc
->block
.height
;
382 src_x
/= desc
->block
.width
;
383 src_y
/= desc
->block
.height
;
385 if (x2
> max_extent
|| y2
> max_extent
||
386 src_x
> max_extent
|| src_y
> max_extent
||
387 (x2
- x1
) * cpp
> gen6_max_bytes_per_scanline
)
390 gen6_XY_SRC_COPY_BLT(ilo
,
391 dst
->bo
, 0, dst
->bo_stride
, dst
->tiling
,
393 src
->bo
, 0, src
->bo_stride
, src
->tiling
,
394 src_x
, src_y
, rop
, cpp
, true);
398 gen6_MI_FLUSH_DW(ilo
);
399 gen6_MI_LOAD_REGISTER_IMM(ilo
, BCS_SWCTRL
, swctrl
);
401 ilo_cp_assert_no_implicit_flush(ilo
->cp
, false);
404 return (slice
== src_box
->depth
);
408 buf_copy_region(struct ilo_context
*ilo
,
409 struct ilo_buffer
*dst
, unsigned dst_offset
,
410 struct ilo_buffer
*src
, unsigned src_offset
,
413 const uint8_t rop
= 0xcc; /* SRCCOPY */
415 struct intel_bo
*aper_check
[3];
417 ilo_blit_own_blt_ring(ilo
);
419 /* make room if necessary */
420 aper_check
[0] = ilo
->cp
->bo
;
421 aper_check
[1] = dst
->bo
;
422 aper_check
[2] = src
->bo
;
423 if (intel_winsys_check_aperture_space(ilo
->winsys
, aper_check
, 3))
424 ilo_cp_flush(ilo
->cp
);
427 unsigned width
, height
;
434 if (width
> gen6_max_bytes_per_scanline
) {
435 /* less than INT16_MAX and dword-aligned */
439 height
= size
/ width
;
440 if (height
> gen6_max_scanlines
)
441 height
= gen6_max_scanlines
;
444 gen6_SRC_COPY_BLT(ilo
,
445 dst
->bo
, dst_offset
+ offset
, pitch
,
447 src
->bo
, src_offset
+ offset
, pitch
,
448 rop
, 1, true, false);
450 offset
+= pitch
* height
;
451 size
-= width
* height
;
458 ilo_resource_copy_region(struct pipe_context
*pipe
,
459 struct pipe_resource
*dst
,
461 unsigned dstx
, unsigned dsty
, unsigned dstz
,
462 struct pipe_resource
*src
,
464 const struct pipe_box
*src_box
)
468 if (dst
->target
!= PIPE_BUFFER
&& src
->target
!= PIPE_BUFFER
) {
469 success
= tex_copy_region(ilo_context(pipe
),
470 ilo_texture(dst
), dst_level
, dstx
, dsty
, dstz
,
471 ilo_texture(src
), src_level
, src_box
);
473 else if (dst
->target
== PIPE_BUFFER
&& src
->target
== PIPE_BUFFER
) {
474 const unsigned dst_offset
= dstx
;
475 const unsigned src_offset
= src_box
->x
;
476 const unsigned size
= src_box
->width
;
478 assert(dst_level
== 0 && dsty
== 0 && dstz
== 0);
479 assert(src_level
== 0 &&
482 src_box
->height
== 1 &&
483 src_box
->depth
== 1);
485 success
= buf_copy_region(ilo_context(pipe
),
486 ilo_buffer(dst
), dst_offset
, ilo_buffer(src
), src_offset
, size
);
493 util_resource_copy_region(pipe
, dst
, dst_level
,
494 dstx
, dsty
, dstz
, src
, src_level
, src_box
);
499 blitter_xy_color_blt(struct pipe_context
*pipe
,
500 struct pipe_resource
*res
,
501 int16_t x1
, int16_t y1
,
502 int16_t x2
, int16_t y2
,
505 struct ilo_context
*ilo
= ilo_context(pipe
);
506 struct ilo_texture
*tex
= ilo_texture(res
);
507 const int cpp
= util_format_get_blocksize(tex
->bo_format
);
508 const uint8_t rop
= 0xf0; /* PATCOPY */
509 struct intel_bo
*aper_check
[2];
511 /* how to support Y-tiling? */
512 if (tex
->tiling
== INTEL_TILING_Y
)
515 /* nothing to clear */
516 if (x1
>= x2
|| y1
>= y2
)
519 ilo_blit_own_blt_ring(ilo
);
521 /* make room if necessary */
522 aper_check
[0] = ilo
->cp
->bo
;
523 aper_check
[1] = tex
->bo
;
524 if (intel_winsys_check_aperture_space(ilo
->winsys
, aper_check
, 2))
525 ilo_cp_flush(ilo
->cp
);
527 gen6_XY_COLOR_BLT(ilo
,
528 tex
->bo
, 0, tex
->bo_stride
, tex
->tiling
,
529 x1
, y1
, x2
, y2
, color
, rop
, cpp
, true);
535 ilo_clear(struct pipe_context
*pipe
,
537 const union pipe_color_union
*color
,
541 struct ilo_context
*ilo
= ilo_context(pipe
);
543 ilo_blitter_pipe_clear_fb(ilo
->blitter
, buffers
, color
, depth
, stencil
);
547 ilo_clear_render_target(struct pipe_context
*pipe
,
548 struct pipe_surface
*dst
,
549 const union pipe_color_union
*color
,
550 unsigned dstx
, unsigned dsty
,
551 unsigned width
, unsigned height
)
553 struct ilo_context
*ilo
= ilo_context(pipe
);
554 union util_color packed
;
556 if (!width
|| !height
|| dstx
>= dst
->width
|| dsty
>= dst
->height
)
559 if (dstx
+ width
> dst
->width
)
560 width
= dst
->width
- dstx
;
561 if (dsty
+ height
> dst
->height
)
562 height
= dst
->height
- dsty
;
564 util_pack_color(color
->f
, dst
->format
, &packed
);
566 /* try HW blit first */
567 if (blitter_xy_color_blt(pipe
, dst
->texture
,
569 dstx
+ width
, dsty
+ height
,
573 ilo_blitter_pipe_clear_rt(ilo
->blitter
,
574 dst
, color
, dstx
, dsty
, width
, height
);
578 ilo_clear_depth_stencil(struct pipe_context
*pipe
,
579 struct pipe_surface
*dst
,
580 unsigned clear_flags
,
583 unsigned dstx
, unsigned dsty
,
584 unsigned width
, unsigned height
)
586 struct ilo_context
*ilo
= ilo_context(pipe
);
589 * The PRM claims that HW blit supports Y-tiling since GEN6, but it does
590 * not tell us how to program it. Since depth buffers are always Y-tiled,
591 * HW blit will not work.
594 ilo_blitter_pipe_clear_zs(ilo
->blitter
,
595 dst
, clear_flags
, depth
, stencil
, dstx
, dsty
, width
, height
);
599 ilo_blit(struct pipe_context
*pipe
, const struct pipe_blit_info
*info
)
601 struct ilo_context
*ilo
= ilo_context(pipe
);
603 ilo_blitter_pipe_blit(ilo
->blitter
, info
);
607 * Initialize blit-related functions.
610 ilo_init_blit_functions(struct ilo_context
*ilo
)
612 ilo
->base
.resource_copy_region
= ilo_resource_copy_region
;
613 ilo
->base
.blit
= ilo_blit
;
615 ilo
->base
.clear
= ilo_clear
;
616 ilo
->base
.clear_render_target
= ilo_clear_render_target
;
617 ilo
->base
.clear_depth_stencil
= ilo_clear_depth_stencil
;