e9076f9ac97e1103b360c0740086700d4ae13967
[mesa.git] / src / mesa / drivers / dri / intel / intel_blit.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include <stdio.h>
30 #include <errno.h>
31
32 #include "mtypes.h"
33 #include "context.h"
34 #include "enums.h"
35
36 #include "intel_batchbuffer.h"
37 #include "intel_blit.h"
38 #include "intel_buffers.h"
39 #include "intel_context.h"
40 #include "intel_fbo.h"
41 #include "intel_reg.h"
42 #include "intel_regions.h"
43
44 #define FILE_DEBUG_FLAG DEBUG_BLIT
45
46 /**
47 * Copy the back color buffer to the front color buffer.
48 * Used for SwapBuffers().
49 */
50 void
51 intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
52 const drm_clip_rect_t * rect)
53 {
54
55 struct intel_context *intel;
56 const intelScreenPrivate *intelScreen;
57 int ret;
58
59 DBG("%s\n", __FUNCTION__);
60
61 assert(dPriv);
62
63 intel = intelScreenContext(dPriv->driScreenPriv->private);
64 if (!intel)
65 return;
66
67 intelScreen = intel->intelScreen;
68
69 /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets
70 * should work regardless.
71 */
72 LOCK_HARDWARE(intel);
73
74 if (dPriv && dPriv->numClipRects) {
75 struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
76 struct intel_region *src, *dst;
77 int nbox = dPriv->numClipRects;
78 drm_clip_rect_t *pbox = dPriv->pClipRects;
79 int cpp;
80 int src_pitch, dst_pitch;
81 unsigned short src_x, src_y;
82 int BR13, CMD;
83 int i;
84
85 src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
86 dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
87
88 src_pitch = src->pitch * src->cpp;
89 dst_pitch = dst->pitch * dst->cpp;
90
91 cpp = src->cpp;
92
93 ASSERT(intel_fb);
94 ASSERT(intel_fb->Base.Name == 0); /* Not a user-created FBO */
95 ASSERT(src);
96 ASSERT(dst);
97 ASSERT(src->cpp == dst->cpp);
98
99 if (cpp == 2) {
100 BR13 = (0xCC << 16) | (1 << 24);
101 CMD = XY_SRC_COPY_BLT_CMD;
102 }
103 else {
104 BR13 = (0xCC << 16) | (1 << 24) | (1 << 25);
105 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
106 }
107
108 #ifndef I915
109 if (src->tiling != I915_TILING_NONE) {
110 CMD |= XY_SRC_TILED;
111 src_pitch /= 4;
112 }
113 if (dst->tiling != I915_TILING_NONE) {
114 CMD |= XY_DST_TILED;
115 dst_pitch /= 4;
116 }
117 #endif
118 /* do space/cliprects check before going any further */
119 intel_batchbuffer_require_space(intel->batch, 8 * 4, REFERENCES_CLIPRECTS);
120 again:
121 ret = dri_bufmgr_check_aperture_space(dst->buffer);
122 ret |= dri_bufmgr_check_aperture_space(src->buffer);
123
124 if (ret) {
125 intel_batchbuffer_flush(intel->batch);
126 goto again;
127 }
128
129 for (i = 0; i < nbox; i++, pbox++) {
130 drm_clip_rect_t box = *pbox;
131
132 if (rect) {
133 if (!intel_intersect_cliprects(&box, &box, rect))
134 continue;
135 }
136
137 if (box.x1 >= box.x2 ||
138 box.y1 >= box.y2)
139 continue;
140
141 assert(box.x1 < box.x2);
142 assert(box.y1 < box.y2);
143 src_x = box.x1 - dPriv->x + dPriv->backX;
144 src_y = box.y1 - dPriv->y + dPriv->backY;
145
146 BEGIN_BATCH(8, REFERENCES_CLIPRECTS);
147 OUT_BATCH(CMD);
148 OUT_BATCH(BR13 | dst_pitch);
149 OUT_BATCH((box.y1 << 16) | box.x1);
150 OUT_BATCH((box.y2 << 16) | box.x2);
151
152 OUT_RELOC(dst->buffer,
153 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
154 0);
155 OUT_BATCH((src_y << 16) | src_x);
156 OUT_BATCH(src_pitch);
157 OUT_RELOC(src->buffer,
158 I915_GEM_DOMAIN_RENDER, 0,
159 0);
160 ADVANCE_BATCH();
161 }
162
163 /* Flush the rendering and the batch so that the results all land on the
164 * screen in a timely fashion.
165 */
166 intel_batchbuffer_emit_mi_flush(intel->batch);
167 intel_batchbuffer_flush(intel->batch);
168 }
169
170 UNLOCK_HARDWARE(intel);
171 }
172
173
174
175
176 void
177 intelEmitFillBlit(struct intel_context *intel,
178 GLuint cpp,
179 GLshort dst_pitch,
180 dri_bo *dst_buffer,
181 GLuint dst_offset,
182 uint32_t dst_tiling,
183 GLshort x, GLshort y,
184 GLshort w, GLshort h,
185 GLuint color)
186 {
187 GLuint BR13, CMD;
188 BATCH_LOCALS;
189
190 dst_pitch *= cpp;
191
192 switch (cpp) {
193 case 1:
194 case 2:
195 case 3:
196 BR13 = (0xF0 << 16) | (1 << 24);
197 CMD = XY_COLOR_BLT_CMD;
198 break;
199 case 4:
200 BR13 = (0xF0 << 16) | (1 << 24) | (1 << 25);
201 CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
202 break;
203 default:
204 return;
205 }
206 #ifndef I915
207 if (dst_tiling != I915_TILING_NONE) {
208 CMD |= XY_DST_TILED;
209 dst_pitch /= 4;
210 }
211 #endif
212
213 DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
214 __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
215
216 assert(w > 0);
217 assert(h > 0);
218
219 BEGIN_BATCH(6, NO_LOOP_CLIPRECTS);
220 OUT_BATCH(CMD);
221 OUT_BATCH(BR13 | dst_pitch);
222 OUT_BATCH((y << 16) | x);
223 OUT_BATCH(((y + h) << 16) | (x + w));
224 OUT_RELOC(dst_buffer,
225 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
226 dst_offset);
227 OUT_BATCH(color);
228 ADVANCE_BATCH();
229 }
230
231 static GLuint translate_raster_op(GLenum logicop)
232 {
233 switch(logicop) {
234 case GL_CLEAR: return 0x00;
235 case GL_AND: return 0x88;
236 case GL_AND_REVERSE: return 0x44;
237 case GL_COPY: return 0xCC;
238 case GL_AND_INVERTED: return 0x22;
239 case GL_NOOP: return 0xAA;
240 case GL_XOR: return 0x66;
241 case GL_OR: return 0xEE;
242 case GL_NOR: return 0x11;
243 case GL_EQUIV: return 0x99;
244 case GL_INVERT: return 0x55;
245 case GL_OR_REVERSE: return 0xDD;
246 case GL_COPY_INVERTED: return 0x33;
247 case GL_OR_INVERTED: return 0xBB;
248 case GL_NAND: return 0x77;
249 case GL_SET: return 0xFF;
250 default: return 0;
251 }
252 }
253
254
255 /* Copy BitBlt
256 */
257 void
258 intelEmitCopyBlit(struct intel_context *intel,
259 GLuint cpp,
260 GLshort src_pitch,
261 dri_bo *src_buffer,
262 GLuint src_offset,
263 uint32_t src_tiling,
264 GLshort dst_pitch,
265 dri_bo *dst_buffer,
266 GLuint dst_offset,
267 uint32_t dst_tiling,
268 GLshort src_x, GLshort src_y,
269 GLshort dst_x, GLshort dst_y,
270 GLshort w, GLshort h,
271 GLenum logic_op)
272 {
273 GLuint CMD, BR13;
274 int dst_y2 = dst_y + h;
275 int dst_x2 = dst_x + w;
276 int ret;
277 BATCH_LOCALS;
278
279 /* do space/cliprects check before going any further */
280 intel_batchbuffer_require_space(intel->batch, 8 * 4, NO_LOOP_CLIPRECTS);
281 again:
282 ret = dri_bufmgr_check_aperture_space(dst_buffer);
283 ret |= dri_bufmgr_check_aperture_space(src_buffer);
284 if (ret) {
285 intel_batchbuffer_flush(intel->batch);
286 goto again;
287 }
288
289 DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
290 __FUNCTION__,
291 src_buffer, src_pitch, src_offset, src_x, src_y,
292 dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
293
294 src_pitch *= cpp;
295 dst_pitch *= cpp;
296
297 BR13 = translate_raster_op(logic_op) << 16;
298
299 switch (cpp) {
300 case 1:
301 case 2:
302 case 3:
303 BR13 |= (1 << 24);
304 CMD = XY_SRC_COPY_BLT_CMD;
305 break;
306 case 4:
307 BR13 |= (1 << 24) | (1 << 25);
308 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
309 break;
310 default:
311 return;
312 }
313
314 #ifndef I915
315 if (dst_tiling != I915_TILING_NONE) {
316 CMD |= XY_DST_TILED;
317 dst_pitch /= 4;
318 }
319 if (src_tiling != I915_TILING_NONE) {
320 CMD |= XY_SRC_TILED;
321 src_pitch /= 4;
322 }
323 #endif
324
325 if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
326 return;
327 }
328
329 /* Initial y values don't seem to work with negative pitches. If
330 * we adjust the offsets manually (below), it seems to work fine.
331 *
332 * On the other hand, if we always adjust, the hardware doesn't
333 * know which blit directions to use, so overlapping copypixels get
334 * the wrong result.
335 */
336 if (dst_pitch > 0 && src_pitch > 0) {
337 assert(dst_x < dst_x2);
338 assert(dst_y < dst_y2);
339
340 BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
341 OUT_BATCH(CMD);
342 OUT_BATCH(BR13 | dst_pitch);
343 OUT_BATCH((dst_y << 16) | dst_x);
344 OUT_BATCH((dst_y2 << 16) | dst_x2);
345 OUT_RELOC(dst_buffer,
346 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
347 dst_offset);
348 OUT_BATCH((src_y << 16) | src_x);
349 OUT_BATCH(src_pitch);
350 OUT_RELOC(src_buffer,
351 I915_GEM_DOMAIN_RENDER, 0,
352 src_offset);
353 ADVANCE_BATCH();
354 }
355 else {
356 assert(dst_x < dst_x2);
357 assert(h > 0);
358
359 BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
360 OUT_BATCH(CMD);
361 OUT_BATCH(BR13 | ((uint16_t)dst_pitch));
362 OUT_BATCH((0 << 16) | dst_x);
363 OUT_BATCH((h << 16) | dst_x2);
364 OUT_RELOC(dst_buffer,
365 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
366 dst_offset + dst_y * dst_pitch);
367 OUT_BATCH((0 << 16) | src_x);
368 OUT_BATCH(src_pitch);
369 OUT_RELOC(src_buffer,
370 I915_GEM_DOMAIN_RENDER, 0,
371 src_offset + src_y * src_pitch);
372 ADVANCE_BATCH();
373 }
374 intel_batchbuffer_emit_mi_flush(intel->batch);
375 }
376
377
378 /**
379 * Use blitting to clear the renderbuffers named by 'flags'.
380 * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
381 * since that might include software renderbuffers or renderbuffers
382 * which we're clearing with triangles.
383 * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear
384 */
385 void
386 intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
387 {
388 struct intel_context *intel = intel_context(ctx);
389 struct gl_framebuffer *fb = ctx->DrawBuffer;
390 GLuint clear_depth;
391 GLbitfield skipBuffers = 0;
392 BATCH_LOCALS;
393
394 /*
395 * Compute values for clearing the buffers.
396 */
397 clear_depth = 0;
398 if (mask & BUFFER_BIT_DEPTH) {
399 clear_depth = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
400 }
401 if (mask & BUFFER_BIT_STENCIL) {
402 clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
403 }
404
405 /* If clearing both depth and stencil, skip BUFFER_BIT_STENCIL in
406 * the loop below.
407 */
408 if ((mask & BUFFER_BIT_DEPTH) && (mask & BUFFER_BIT_STENCIL)) {
409 skipBuffers = BUFFER_BIT_STENCIL;
410 }
411
412 /* XXX Move this flush/lock into the following conditional? */
413 intelFlush(&intel->ctx);
414 LOCK_HARDWARE(intel);
415
416 if (intel->numClipRects) {
417 GLint cx, cy, cw, ch;
418 drm_clip_rect_t clear;
419 int i;
420
421 /* Get clear bounds after locking */
422 cx = fb->_Xmin;
423 cy = fb->_Ymin;
424 cw = fb->_Xmax - cx;
425 ch = fb->_Ymax - cy;
426
427 if (fb->Name == 0) {
428 /* clearing a window */
429
430 /* flip top to bottom */
431 clear.x1 = cx + intel->drawX;
432 clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch;
433 clear.x2 = clear.x1 + cw;
434 clear.y2 = clear.y1 + ch;
435 }
436 else {
437 /* clearing FBO */
438 assert(intel->numClipRects == 1);
439 assert(intel->pClipRects == &intel->fboRect);
440 clear.x1 = cx;
441 clear.y1 = cy;
442 clear.x2 = clear.x1 + cw;
443 clear.y2 = clear.y1 + ch;
444 /* no change to mask */
445 }
446
447 for (i = 0; i < intel->numClipRects; i++) {
448 const drm_clip_rect_t *box = &intel->pClipRects[i];
449 drm_clip_rect_t b;
450 GLuint buf;
451 GLuint clearMask = mask; /* use copy, since we modify it below */
452 GLboolean all = (cw == fb->Width && ch == fb->Height);
453
454 if (!all) {
455 intel_intersect_cliprects(&b, &clear, box);
456 }
457 else {
458 b = *box;
459 }
460
461 if (b.x1 >= b.x2 || b.y1 >= b.y2)
462 continue;
463
464 if (0)
465 _mesa_printf("clear %d,%d..%d,%d, mask %x\n",
466 b.x1, b.y1, b.x2, b.y2, mask);
467
468 /* Loop over all renderbuffers */
469 for (buf = 0; buf < BUFFER_COUNT && clearMask; buf++) {
470 const GLbitfield bufBit = 1 << buf;
471 if ((clearMask & bufBit) && !(bufBit & skipBuffers)) {
472 /* OK, clear this renderbuffer */
473 struct intel_region *irb_region =
474 intel_get_rb_region(fb, buf);
475 dri_bo *write_buffer =
476 intel_region_buffer(intel, irb_region,
477 all ? INTEL_WRITE_FULL :
478 INTEL_WRITE_PART);
479
480 GLuint clearVal;
481 GLint pitch, cpp;
482 GLuint BR13, CMD;
483
484 ASSERT(irb_region);
485
486 pitch = irb_region->pitch;
487 cpp = irb_region->cpp;
488
489 DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
490 __FUNCTION__,
491 irb_region->buffer, (pitch * cpp),
492 irb_region->draw_offset,
493 b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1);
494
495 BR13 = 0xf0 << 16;
496 CMD = XY_COLOR_BLT_CMD;
497
498 /* Setup the blit command */
499 if (cpp == 4) {
500 BR13 |= (1 << 24) | (1 << 25);
501 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
502 if (clearMask & BUFFER_BIT_DEPTH)
503 CMD |= XY_BLT_WRITE_RGB;
504 if (clearMask & BUFFER_BIT_STENCIL)
505 CMD |= XY_BLT_WRITE_ALPHA;
506 }
507 else {
508 /* clearing RGBA */
509 CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
510 }
511 }
512 else {
513 ASSERT(cpp == 2 || cpp == 0);
514 BR13 |= (1 << 24);
515 }
516
517 #ifndef I915
518 if (irb_region->tiling != I915_TILING_NONE) {
519 CMD |= XY_DST_TILED;
520 pitch /= 4;
521 }
522 #endif
523 BR13 |= (pitch * cpp);
524
525 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
526 clearVal = clear_depth;
527 }
528 else {
529 clearVal = (cpp == 4)
530 ? intel->ClearColor8888 : intel->ClearColor565;
531 }
532 /*
533 _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n",
534 buf, irb->Base.Name);
535 */
536 intel_wait_flips(intel);
537
538 assert(b.x1 < b.x2);
539 assert(b.y1 < b.y2);
540
541 BEGIN_BATCH(6, REFERENCES_CLIPRECTS);
542 OUT_BATCH(CMD);
543 OUT_BATCH(BR13);
544 OUT_BATCH((b.y1 << 16) | b.x1);
545 OUT_BATCH((b.y2 << 16) | b.x2);
546 OUT_RELOC(write_buffer,
547 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
548 irb_region->draw_offset);
549 OUT_BATCH(clearVal);
550 ADVANCE_BATCH();
551 clearMask &= ~bufBit; /* turn off bit, for faster loop exit */
552 }
553 }
554 }
555 intel_batchbuffer_emit_mi_flush(intel->batch);
556 }
557
558 UNLOCK_HARDWARE(intel);
559 }
560
561 void
562 intelEmitImmediateColorExpandBlit(struct intel_context *intel,
563 GLuint cpp,
564 GLubyte *src_bits, GLuint src_size,
565 GLuint fg_color,
566 GLshort dst_pitch,
567 dri_bo *dst_buffer,
568 GLuint dst_offset,
569 uint32_t dst_tiling,
570 GLshort x, GLshort y,
571 GLshort w, GLshort h,
572 GLenum logic_op)
573 {
574 int dwords = ALIGN(src_size, 8) / 4;
575 uint32_t opcode, br13, blit_cmd;
576
577 assert( logic_op - GL_CLEAR >= 0 );
578 assert( logic_op - GL_CLEAR < 0x10 );
579
580 if (w < 0 || h < 0)
581 return;
582
583 dst_pitch *= cpp;
584
585 DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
586 __FUNCTION__,
587 dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
588
589 intel_batchbuffer_require_space( intel->batch,
590 (8 * 4) +
591 (3 * 4) +
592 dwords,
593 REFERENCES_CLIPRECTS );
594
595 opcode = XY_SETUP_BLT_CMD;
596 if (cpp == 4)
597 opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
598 #ifndef I915
599 if (dst_tiling != I915_TILING_NONE) {
600 opcode |= XY_DST_TILED;
601 dst_pitch /= 4;
602 }
603 #endif
604
605 br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
606 if (cpp == 2)
607 br13 |= BR13_565;
608 else
609 br13 |= BR13_8888;
610
611 blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
612 if (dst_tiling != I915_TILING_NONE)
613 blit_cmd |= XY_DST_TILED;
614
615 BEGIN_BATCH(8 + 3, REFERENCES_CLIPRECTS);
616 OUT_BATCH(opcode);
617 OUT_BATCH(br13);
618 OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
619 OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
620 OUT_RELOC(dst_buffer,
621 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
622 dst_offset);
623 OUT_BATCH(0); /* bg */
624 OUT_BATCH(fg_color); /* fg */
625 OUT_BATCH(0); /* pattern base addr */
626
627 OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
628 OUT_BATCH((y << 16) | x);
629 OUT_BATCH(((y + h) << 16) | (x + w));
630 ADVANCE_BATCH();
631
632 intel_batchbuffer_data( intel->batch,
633 src_bits,
634 dwords * 4,
635 REFERENCES_CLIPRECTS );
636
637 intel_batchbuffer_emit_mi_flush(intel->batch);
638 }