[intel] all flushing in intelEmitCopyBlit
[mesa.git] / src / mesa / drivers / dri / intel / intel_blit.c
1 /**************************************************************************
2 *
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include <stdio.h>
30 #include <errno.h>
31
32 #include "mtypes.h"
33 #include "context.h"
34 #include "enums.h"
35
36 #include "intel_batchbuffer.h"
37 #include "intel_blit.h"
38 #include "intel_buffers.h"
39 #include "intel_context.h"
40 #include "intel_fbo.h"
41 #include "intel_reg.h"
42 #include "intel_regions.h"
43
44 #define FILE_DEBUG_FLAG DEBUG_BLIT
45
46 /**
47 * Copy the back color buffer to the front color buffer.
48 * Used for SwapBuffers().
49 */
50 void
51 intelCopyBuffer(const __DRIdrawablePrivate * dPriv,
52 const drm_clip_rect_t * rect)
53 {
54
55 struct intel_context *intel;
56 const intelScreenPrivate *intelScreen;
57 int ret;
58
59 DBG("%s\n", __FUNCTION__);
60
61 assert(dPriv);
62
63 intel = intelScreenContext(dPriv->driScreenPriv->private);
64 if (!intel)
65 return;
66
67 intelScreen = intel->intelScreen;
68
69 /* The LOCK_HARDWARE is required for the cliprects. Buffer offsets
70 * should work regardless.
71 */
72 LOCK_HARDWARE(intel);
73
74 if (dPriv && dPriv->numClipRects) {
75 struct intel_framebuffer *intel_fb = dPriv->driverPrivate;
76 struct intel_region *src, *dst;
77 int nbox = dPriv->numClipRects;
78 drm_clip_rect_t *pbox = dPriv->pClipRects;
79 int cpp;
80 int src_pitch, dst_pitch;
81 unsigned short src_x, src_y;
82 int BR13, CMD;
83 int i;
84
85 src = intel_get_rb_region(&intel_fb->Base, BUFFER_BACK_LEFT);
86 dst = intel_get_rb_region(&intel_fb->Base, BUFFER_FRONT_LEFT);
87
88 src_pitch = src->pitch * src->cpp;
89 dst_pitch = dst->pitch * dst->cpp;
90
91 cpp = src->cpp;
92
93 ASSERT(intel_fb);
94 ASSERT(intel_fb->Base.Name == 0); /* Not a user-created FBO */
95 ASSERT(src);
96 ASSERT(dst);
97 ASSERT(src->cpp == dst->cpp);
98
99 if (cpp == 2) {
100 BR13 = (0xCC << 16) | (1 << 24);
101 CMD = XY_SRC_COPY_BLT_CMD;
102 }
103 else {
104 BR13 = (0xCC << 16) | (1 << 24) | (1 << 25);
105 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
106 }
107
108 #ifndef I915
109 if (src->tiled) {
110 CMD |= XY_SRC_TILED;
111 src_pitch /= 4;
112 }
113 if (dst->tiled) {
114 CMD |= XY_DST_TILED;
115 dst_pitch /= 4;
116 }
117 #endif
118
119 again:
120 ret = dri_bufmgr_check_aperture_space(dst->buffer);
121 ret |= dri_bufmgr_check_aperture_space(src->buffer);
122
123 if (ret) {
124 intel_batchbuffer_flush(intel->batch);
125 goto again;
126 }
127
128 for (i = 0; i < nbox; i++, pbox++) {
129 drm_clip_rect_t box = *pbox;
130
131 if (rect) {
132 if (!intel_intersect_cliprects(&box, &box, rect))
133 continue;
134 }
135
136 if (box.x1 >= box.x2 ||
137 box.y1 >= box.y2)
138 continue;
139
140 assert(box.x1 < box.x2);
141 assert(box.y1 < box.y2);
142 src_x = box.x1 - dPriv->x + dPriv->backX;
143 src_y = box.y1 - dPriv->y + dPriv->backY;
144
145 BEGIN_BATCH(8, REFERENCES_CLIPRECTS);
146 OUT_BATCH(CMD);
147 OUT_BATCH(BR13 | dst_pitch);
148 OUT_BATCH((box.y1 << 16) | box.x1);
149 OUT_BATCH((box.y2 << 16) | box.x2);
150
151 OUT_RELOC(dst->buffer,
152 DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER,
153 0);
154 OUT_BATCH((src_y << 16) | src_x);
155 OUT_BATCH(src_pitch);
156 OUT_RELOC(src->buffer,
157 DRM_GEM_DOMAIN_I915_RENDER, 0,
158 0);
159 ADVANCE_BATCH();
160 }
161
162 /* Emit a flush so that, on systems where we don't have automatic flushing
163 * set (such as 965), the results all land on the screen in a timely
164 * fashion.
165 */
166 BEGIN_BATCH(1, IGNORE_CLIPRECTS);
167 OUT_BATCH(MI_FLUSH);
168 ADVANCE_BATCH();
169
170 intel_batchbuffer_flush(intel->batch);
171 }
172
173 UNLOCK_HARDWARE(intel);
174 }
175
176
177
178
179 void
180 intelEmitFillBlit(struct intel_context *intel,
181 GLuint cpp,
182 GLshort dst_pitch,
183 dri_bo *dst_buffer,
184 GLuint dst_offset,
185 GLboolean dst_tiled,
186 GLshort x, GLshort y,
187 GLshort w, GLshort h,
188 GLuint color)
189 {
190 GLuint BR13, CMD;
191 BATCH_LOCALS;
192
193 dst_pitch *= cpp;
194
195 switch (cpp) {
196 case 1:
197 case 2:
198 case 3:
199 BR13 = (0xF0 << 16) | (1 << 24);
200 CMD = XY_COLOR_BLT_CMD;
201 break;
202 case 4:
203 BR13 = (0xF0 << 16) | (1 << 24) | (1 << 25);
204 CMD = XY_COLOR_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
205 break;
206 default:
207 return;
208 }
209 #ifndef I915
210 if (dst_tiled) {
211 CMD |= XY_DST_TILED;
212 dst_pitch /= 4;
213 }
214 #endif
215
216 DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
217 __FUNCTION__, dst_buffer, dst_pitch, dst_offset, x, y, w, h);
218
219 assert(w > 0);
220 assert(h > 0);
221
222 BEGIN_BATCH(6, NO_LOOP_CLIPRECTS);
223 OUT_BATCH(CMD);
224 OUT_BATCH(BR13 | dst_pitch);
225 OUT_BATCH((y << 16) | x);
226 OUT_BATCH(((y + h) << 16) | (x + w));
227 OUT_RELOC(dst_buffer,
228 DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER,
229 dst_offset);
230 OUT_BATCH(color);
231 ADVANCE_BATCH();
232 }
233
234 static GLuint translate_raster_op(GLenum logicop)
235 {
236 switch(logicop) {
237 case GL_CLEAR: return 0x00;
238 case GL_AND: return 0x88;
239 case GL_AND_REVERSE: return 0x44;
240 case GL_COPY: return 0xCC;
241 case GL_AND_INVERTED: return 0x22;
242 case GL_NOOP: return 0xAA;
243 case GL_XOR: return 0x66;
244 case GL_OR: return 0xEE;
245 case GL_NOR: return 0x11;
246 case GL_EQUIV: return 0x99;
247 case GL_INVERT: return 0x55;
248 case GL_OR_REVERSE: return 0xDD;
249 case GL_COPY_INVERTED: return 0x33;
250 case GL_OR_INVERTED: return 0xBB;
251 case GL_NAND: return 0x77;
252 case GL_SET: return 0xFF;
253 default: return 0;
254 }
255 }
256
257
258 /* Copy BitBlt
259 */
260 void
261 intelEmitCopyBlit(struct intel_context *intel,
262 GLuint cpp,
263 GLshort src_pitch,
264 dri_bo *src_buffer,
265 GLuint src_offset,
266 GLboolean src_tiled,
267 GLshort dst_pitch,
268 dri_bo *dst_buffer,
269 GLuint dst_offset,
270 GLboolean dst_tiled,
271 GLshort src_x, GLshort src_y,
272 GLshort dst_x, GLshort dst_y,
273 GLshort w, GLshort h,
274 GLenum logic_op)
275 {
276 GLuint CMD, BR13;
277 int dst_y2 = dst_y + h;
278 int dst_x2 = dst_x + w;
279 int ret;
280 BATCH_LOCALS;
281
282 again:
283 ret = dri_bufmgr_check_aperture_space(dst_buffer);
284 ret |= dri_bufmgr_check_aperture_space(src_buffer);
285 if (ret) {
286 intel_batchbuffer_flush(intel->batch);
287 goto again;
288 }
289
290 DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
291 __FUNCTION__,
292 src_buffer, src_pitch, src_offset, src_x, src_y,
293 dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h);
294
295 src_pitch *= cpp;
296 dst_pitch *= cpp;
297
298 BR13 = translate_raster_op(logic_op) << 16;
299
300 switch (cpp) {
301 case 1:
302 case 2:
303 case 3:
304 BR13 |= (1 << 24);
305 CMD = XY_SRC_COPY_BLT_CMD;
306 break;
307 case 4:
308 BR13 |= (1 << 24) | (1 << 25);
309 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
310 break;
311 default:
312 return;
313 }
314
315 #ifndef I915
316 if (dst_tiled) {
317 CMD |= XY_DST_TILED;
318 dst_pitch /= 4;
319 }
320 if (src_tiled) {
321 CMD |= XY_SRC_TILED;
322 src_pitch /= 4;
323 }
324 #endif
325
326 if (dst_y2 <= dst_y || dst_x2 <= dst_x) {
327 return;
328 }
329
330 dst_pitch &= 0xffff;
331 src_pitch &= 0xffff;
332
333 /* Initial y values don't seem to work with negative pitches. If
334 * we adjust the offsets manually (below), it seems to work fine.
335 *
336 * On the other hand, if we always adjust, the hardware doesn't
337 * know which blit directions to use, so overlapping copypixels get
338 * the wrong result.
339 */
340 if (dst_pitch > 0 && src_pitch > 0) {
341 assert(dst_x < dst_x2);
342 assert(dst_y < dst_y2);
343
344 BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
345 OUT_BATCH(CMD);
346 OUT_BATCH(BR13 | dst_pitch);
347 OUT_BATCH((dst_y << 16) | dst_x);
348 OUT_BATCH((dst_y2 << 16) | dst_x2);
349 OUT_RELOC(dst_buffer,
350 DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER,
351 dst_offset);
352 OUT_BATCH((src_y << 16) | src_x);
353 OUT_BATCH(src_pitch);
354 OUT_RELOC(src_buffer,
355 DRM_GEM_DOMAIN_I915_RENDER, 0,
356 src_offset);
357 ADVANCE_BATCH();
358 }
359 else {
360 assert(dst_x < dst_x2);
361 assert(h > 0);
362
363 BEGIN_BATCH(8, NO_LOOP_CLIPRECTS);
364 OUT_BATCH(CMD);
365 OUT_BATCH(BR13 | dst_pitch);
366 OUT_BATCH((0 << 16) | dst_x);
367 OUT_BATCH((h << 16) | dst_x2);
368 OUT_RELOC(dst_buffer,
369 DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER,
370 dst_offset + dst_y * dst_pitch);
371 OUT_BATCH((0 << 16) | src_x);
372 OUT_BATCH(src_pitch);
373 OUT_RELOC(src_buffer,
374 DRM_GEM_DOMAIN_I915_RENDER, 0,
375 src_offset + src_y * src_pitch);
376 ADVANCE_BATCH();
377 }
378 BEGIN_BATCH(1, NO_LOOP_CLIPRECTS);
379 OUT_BATCH(MI_FLUSH);
380 ADVANCE_BATCH();
381 intel_batchbuffer_flush(intel->batch);
382 }
383
384
385 /**
386 * Use blitting to clear the renderbuffers named by 'flags'.
387 * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field
388 * since that might include software renderbuffers or renderbuffers
389 * which we're clearing with triangles.
390 * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear
391 */
392 void
393 intelClearWithBlit(GLcontext *ctx, GLbitfield mask)
394 {
395 struct intel_context *intel = intel_context(ctx);
396 struct gl_framebuffer *fb = ctx->DrawBuffer;
397 GLuint clear_depth;
398 GLbitfield skipBuffers = 0;
399 BATCH_LOCALS;
400
401 /*
402 * Compute values for clearing the buffers.
403 */
404 clear_depth = 0;
405 if (mask & BUFFER_BIT_DEPTH) {
406 clear_depth = (GLuint) (fb->_DepthMax * ctx->Depth.Clear);
407 }
408 if (mask & BUFFER_BIT_STENCIL) {
409 clear_depth |= (ctx->Stencil.Clear & 0xff) << 24;
410 }
411
412 /* If clearing both depth and stencil, skip BUFFER_BIT_STENCIL in
413 * the loop below.
414 */
415 if ((mask & BUFFER_BIT_DEPTH) && (mask & BUFFER_BIT_STENCIL)) {
416 skipBuffers = BUFFER_BIT_STENCIL;
417 }
418
419 /* XXX Move this flush/lock into the following conditional? */
420 intelFlush(&intel->ctx);
421 LOCK_HARDWARE(intel);
422
423 if (intel->numClipRects) {
424 GLint cx, cy, cw, ch;
425 drm_clip_rect_t clear;
426 int i;
427
428 /* Get clear bounds after locking */
429 cx = fb->_Xmin;
430 cy = fb->_Ymin;
431 cw = fb->_Xmax - cx;
432 ch = fb->_Ymax - cy;
433
434 if (fb->Name == 0) {
435 /* clearing a window */
436
437 /* flip top to bottom */
438 clear.x1 = cx + intel->drawX;
439 clear.y1 = intel->driDrawable->y + intel->driDrawable->h - cy - ch;
440 clear.x2 = clear.x1 + cw;
441 clear.y2 = clear.y1 + ch;
442 }
443 else {
444 /* clearing FBO */
445 assert(intel->numClipRects == 1);
446 assert(intel->pClipRects == &intel->fboRect);
447 clear.x1 = cx;
448 clear.y1 = cy;
449 clear.x2 = clear.x1 + cw;
450 clear.y2 = clear.y1 + ch;
451 /* no change to mask */
452 }
453
454 for (i = 0; i < intel->numClipRects; i++) {
455 const drm_clip_rect_t *box = &intel->pClipRects[i];
456 drm_clip_rect_t b;
457 GLuint buf;
458 GLuint clearMask = mask; /* use copy, since we modify it below */
459 GLboolean all = (cw == fb->Width && ch == fb->Height);
460
461 if (!all) {
462 intel_intersect_cliprects(&b, &clear, box);
463 }
464 else {
465 b = *box;
466 }
467
468 if (b.x1 >= b.x2 || b.y1 >= b.y2)
469 continue;
470
471 if (0)
472 _mesa_printf("clear %d,%d..%d,%d, mask %x\n",
473 b.x1, b.y1, b.x2, b.y2, mask);
474
475 /* Loop over all renderbuffers */
476 for (buf = 0; buf < BUFFER_COUNT && clearMask; buf++) {
477 const GLbitfield bufBit = 1 << buf;
478 if ((clearMask & bufBit) && !(bufBit & skipBuffers)) {
479 /* OK, clear this renderbuffer */
480 struct intel_region *irb_region =
481 intel_get_rb_region(fb, buf);
482 dri_bo *write_buffer =
483 intel_region_buffer(intel, irb_region,
484 all ? INTEL_WRITE_FULL :
485 INTEL_WRITE_PART);
486
487 GLuint clearVal;
488 GLint pitch, cpp;
489 GLuint BR13, CMD;
490
491 ASSERT(irb_region);
492
493 pitch = irb_region->pitch;
494 cpp = irb_region->cpp;
495
496 DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n",
497 __FUNCTION__,
498 irb_region->buffer, (pitch * cpp),
499 irb_region->draw_offset,
500 b.x1, b.y1, b.x2 - b.x1, b.y2 - b.y1);
501
502 BR13 = 0xf0 << 16;
503 CMD = XY_COLOR_BLT_CMD;
504
505 /* Setup the blit command */
506 if (cpp == 4) {
507 BR13 |= (1 << 24) | (1 << 25);
508 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
509 if (clearMask & BUFFER_BIT_DEPTH)
510 CMD |= XY_BLT_WRITE_RGB;
511 if (clearMask & BUFFER_BIT_STENCIL)
512 CMD |= XY_BLT_WRITE_ALPHA;
513 }
514 else {
515 /* clearing RGBA */
516 CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
517 }
518 }
519 else {
520 ASSERT(cpp == 2 || cpp == 0);
521 BR13 |= (1 << 24);
522 }
523
524 #ifndef I915
525 if (irb_region->tiled) {
526 CMD |= XY_DST_TILED;
527 pitch /= 4;
528 }
529 #endif
530 BR13 |= (pitch * cpp);
531
532 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) {
533 clearVal = clear_depth;
534 }
535 else {
536 clearVal = (cpp == 4)
537 ? intel->ClearColor8888 : intel->ClearColor565;
538 }
539 /*
540 _mesa_debug(ctx, "hardware blit clear buf %d rb id %d\n",
541 buf, irb->Base.Name);
542 */
543 intel_wait_flips(intel);
544
545 assert(b.x1 < b.x2);
546 assert(b.y1 < b.y2);
547
548 BEGIN_BATCH(6, REFERENCES_CLIPRECTS);
549 OUT_BATCH(CMD);
550 OUT_BATCH(BR13);
551 OUT_BATCH((b.y1 << 16) | b.x1);
552 OUT_BATCH((b.y2 << 16) | b.x2);
553 OUT_RELOC(write_buffer,
554 DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER,
555 irb_region->draw_offset);
556 OUT_BATCH(clearVal);
557 ADVANCE_BATCH();
558 clearMask &= ~bufBit; /* turn off bit, for faster loop exit */
559 }
560 }
561 }
562 intel_batchbuffer_flush(intel->batch);
563 }
564
565 UNLOCK_HARDWARE(intel);
566 }
567
568 void
569 intelEmitImmediateColorExpandBlit(struct intel_context *intel,
570 GLuint cpp,
571 GLubyte *src_bits, GLuint src_size,
572 GLuint fg_color,
573 GLshort dst_pitch,
574 dri_bo *dst_buffer,
575 GLuint dst_offset,
576 GLboolean dst_tiled,
577 GLshort x, GLshort y,
578 GLshort w, GLshort h,
579 GLenum logic_op)
580 {
581 int dwords = ALIGN(src_size, 8) / 4;
582 uint32_t opcode, br13, blit_cmd;
583
584 assert( logic_op - GL_CLEAR >= 0 );
585 assert( logic_op - GL_CLEAR < 0x10 );
586
587 if (w < 0 || h < 0)
588 return;
589
590 dst_pitch *= cpp;
591
592 if (dst_tiled)
593 dst_pitch /= 4;
594
595 DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n",
596 __FUNCTION__,
597 dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords);
598
599 intel_batchbuffer_require_space( intel->batch,
600 (8 * 4) +
601 (3 * 4) +
602 dwords,
603 NO_LOOP_CLIPRECTS );
604
605 opcode = XY_SETUP_BLT_CMD;
606 if (cpp == 4)
607 opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB;
608 if (dst_tiled)
609 opcode |= XY_DST_TILED;
610
611 br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29);
612 if (cpp == 2)
613 br13 |= BR13_565;
614 else
615 br13 |= BR13_8888;
616
617 blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */
618 if (dst_tiled)
619 blit_cmd |= XY_DST_TILED;
620
621 BEGIN_BATCH(8 + 3, NO_LOOP_CLIPRECTS);
622 OUT_BATCH(opcode);
623 OUT_BATCH(br13);
624 OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */
625 OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */
626 OUT_RELOC(dst_buffer,
627 DRM_GEM_DOMAIN_I915_RENDER, DRM_GEM_DOMAIN_I915_RENDER,
628 dst_offset);
629 OUT_BATCH(0); /* bg */
630 OUT_BATCH(fg_color); /* fg */
631 OUT_BATCH(0); /* pattern base addr */
632
633 OUT_BATCH(blit_cmd | ((3 - 2) + dwords));
634 OUT_BATCH((y << 16) | x);
635 OUT_BATCH(((y + h) << 16) | (x + w));
636 ADVANCE_BATCH();
637
638 intel_batchbuffer_data( intel->batch,
639 src_bits,
640 dwords * 4,
641 NO_LOOP_CLIPRECTS );
642 }