radeon: Wait for BO idle if necessary before mapping it.
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "swrast/swrast.h"
45
46 #include "radeon_common.h"
47 #include "radeon_lock.h"
48 #include "radeon_span.h"
49
50 #define DBG 0
51
52 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54 static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
55 GLint x, GLint y)
56 {
57 GLubyte *ptr = rrb->bo->ptr;
58 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
59 GLint offset;
60 GLint nmacroblkpl;
61 GLint nmicroblkpl;
62
63 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
64 offset = x * rrb->cpp + y * rrb->pitch;
65 } else {
66 offset = 0;
67 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
68 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
69 nmacroblkpl = rrb->pitch >> 5;
70 offset += ((y >> 4) * nmacroblkpl) << 11;
71 offset += ((y & 15) >> 1) << 8;
72 offset += (y & 1) << 4;
73 offset += (x >> 5) << 11;
74 offset += ((x & 31) >> 2) << 5;
75 offset += (x & 3) << 2;
76 } else {
77 nmacroblkpl = rrb->pitch >> 6;
78 offset += ((y >> 3) * nmacroblkpl) << 11;
79 offset += (y & 7) << 8;
80 offset += (x >> 6) << 11;
81 offset += ((x & 63) >> 3) << 5;
82 offset += (x & 7) << 2;
83 }
84 } else {
85 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
86 offset += (y * nmicroblkpl) << 5;
87 offset += (x >> 3) << 5;
88 offset += (x & 7) << 2;
89 }
90 }
91 return &ptr[offset];
92 }
93
94 static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
95 GLint x, GLint y)
96 {
97 GLubyte *ptr = rrb->bo->ptr;
98 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
99 GLint offset;
100 GLint nmacroblkpl;
101 GLint nmicroblkpl;
102
103 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
104 offset = x * rrb->cpp + y * rrb->pitch;
105 } else {
106 offset = 0;
107 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
108 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
109 nmacroblkpl = rrb->pitch >> 6;
110 offset += ((y >> 4) * nmacroblkpl) << 11;
111 offset += ((y & 15) >> 1) << 8;
112 offset += (y & 1) << 4;
113 offset += (x >> 6) << 11;
114 offset += ((x & 63) >> 3) << 5;
115 offset += (x & 7) << 1;
116 } else {
117 nmacroblkpl = rrb->pitch >> 7;
118 offset += ((y >> 3) * nmacroblkpl) << 11;
119 offset += (y & 7) << 8;
120 offset += (x >> 7) << 11;
121 offset += ((x & 127) >> 4) << 5;
122 offset += (x & 15) << 2;
123 }
124 } else {
125 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
126 offset += (y * nmicroblkpl) << 5;
127 offset += (x >> 4) << 5;
128 offset += (x & 15) << 2;
129 }
130 }
131 return &ptr[offset];
132 }
133
134 static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
135 GLint x, GLint y)
136 {
137 GLubyte *ptr = rrb->bo->ptr;
138 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
139 GLint offset;
140 GLint microblkxs;
141 GLint macroblkxs;
142 GLint nmacroblkpl;
143 GLint nmicroblkpl;
144
145 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
146 offset = x * rrb->cpp + y * rrb->pitch;
147 } else {
148 offset = 0;
149 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
150 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
151 microblkxs = 16 / rrb->cpp;
152 macroblkxs = 128 / rrb->cpp;
153 nmacroblkpl = rrb->pitch / macroblkxs;
154 offset += ((y >> 4) * nmacroblkpl) << 11;
155 offset += ((y & 15) >> 1) << 8;
156 offset += (y & 1) << 4;
157 offset += (x / macroblkxs) << 11;
158 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
159 offset += (x & (microblkxs - 1)) * rrb->cpp;
160 } else {
161 microblkxs = 32 / rrb->cpp;
162 macroblkxs = 256 / rrb->cpp;
163 nmacroblkpl = rrb->pitch / macroblkxs;
164 offset += ((y >> 3) * nmacroblkpl) << 11;
165 offset += (y & 7) << 8;
166 offset += (x / macroblkxs) << 11;
167 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
168 offset += (x & (microblkxs - 1)) * rrb->cpp;
169 }
170 } else {
171 microblkxs = 32 / rrb->cpp;
172 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
173 offset += (y * nmicroblkpl) << 5;
174 offset += (x / microblkxs) << 5;
175 offset += (x & (microblkxs - 1)) * rrb->cpp;
176 }
177 }
178 return &ptr[offset];
179 }
180
181 #ifndef COMPILE_R300
182 static uint32_t
183 z24s8_to_s8z24(uint32_t val)
184 {
185 return (val << 24) | (val >> 8);
186 }
187
188 static uint32_t
189 s8z24_to_z24s8(uint32_t val)
190 {
191 return (val >> 24) | (val << 8);
192 }
193 #endif
194
195 /*
196 * Note that all information needed to access pixels in a renderbuffer
197 * should be obtained through the gl_renderbuffer parameter, not per-context
198 * information.
199 */
200 #define LOCAL_VARS \
201 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
202 struct radeon_renderbuffer *rrb = (void *) rb; \
203 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
204 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
205 unsigned int num_cliprects; \
206 struct drm_clip_rect *cliprects; \
207 int x_off, y_off; \
208 GLuint p; \
209 (void)p; \
210 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
211
212 #define LOCAL_DEPTH_VARS \
213 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
214 struct radeon_renderbuffer *rrb = (void *) rb; \
215 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
216 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
217 unsigned int num_cliprects; \
218 struct drm_clip_rect *cliprects; \
219 int x_off, y_off; \
220 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
221
222 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
223
224 #define Y_FLIP(_y) ((_y) * yScale + yBias)
225
226 #define HW_LOCK()
227
228 #define HW_UNLOCK()
229
230 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
231 * the cliprect info from the context, not the driDrawable.
232 * Move this into spantmp2.h someday.
233 */
234 #define HW_CLIPLOOP() \
235 do { \
236 int _nc = num_cliprects; \
237 while ( _nc-- ) { \
238 int minx = cliprects[_nc].x1 - x_off; \
239 int miny = cliprects[_nc].y1 - y_off; \
240 int maxx = cliprects[_nc].x2 - x_off; \
241 int maxy = cliprects[_nc].y2 - y_off;
242
243 /* ================================================================
244 * Color buffer
245 */
246
247 /* 16 bit, RGB565 color spanline and pixel functions
248 */
249 #define SPANTMP_PIXEL_FMT GL_RGB
250 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
251
252 #define TAG(x) radeon##x##_RGB565
253 #define TAG2(x,y) radeon##x##_RGB565##y
254 #define GET_PTR(X,Y) radeon_ptr16(rrb, (X) + x_off, (Y) + y_off)
255 #include "spantmp2.h"
256
257 /* 32 bit, xRGB8888 color spanline and pixel functions
258 */
259 #define SPANTMP_PIXEL_FMT GL_BGRA
260 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
261
262 #define TAG(x) radeon##x##_xRGB8888
263 #define TAG2(x,y) radeon##x##_xRGB8888##y
264 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) | 0xff000000))
265 #define PUT_VALUE(_x, _y, d) { \
266 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
267 *_ptr = d; \
268 } while (0)
269 #include "spantmp2.h"
270
271 /* 32 bit, ARGB8888 color spanline and pixel functions
272 */
273 #define SPANTMP_PIXEL_FMT GL_BGRA
274 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
275
276 #define TAG(x) radeon##x##_ARGB8888
277 #define TAG2(x,y) radeon##x##_ARGB8888##y
278 #define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off)
279 #include "spantmp2.h"
280
281 /* ================================================================
282 * Depth buffer
283 */
284
285 /* The Radeon family has depth tiling on all the time, so we have to convert
286 * the x,y coordinates into the memory bus address (mba) in the same
287 * manner as the engine. In each case, the linear block address (ba)
288 * is calculated, and then wired with x and y to produce the final
289 * memory address.
290 * The chip will do address translation on its own if the surface registers
291 * are set up correctly. It is not quite enough to get it working with hyperz
292 * too...
293 */
294
295 /* 16-bit depth buffer functions
296 */
297 #define VALUE_TYPE GLushort
298
299 #define WRITE_DEPTH( _x, _y, d ) \
300 *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) = d
301
302 #define READ_DEPTH( d, _x, _y ) \
303 d = *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off)
304
305 #define TAG(x) radeon##x##_z16
306 #include "depthtmp.h"
307
308 /* 24 bit depth
309 *
310 * Careful: It looks like the R300 uses ZZZS byte order while the R200
311 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
312 */
313 #define VALUE_TYPE GLuint
314
315 #ifdef COMPILE_R300
316 #define WRITE_DEPTH( _x, _y, d ) \
317 do { \
318 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
319 GLuint tmp = *_ptr; \
320 tmp &= 0x000000ff; \
321 tmp |= ((d << 8) & 0xffffff00); \
322 *_ptr = tmp; \
323 } while (0)
324 #else
325 #define WRITE_DEPTH( _x, _y, d ) \
326 do { \
327 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
328 GLuint tmp = *_ptr; \
329 tmp &= 0xff000000; \
330 tmp |= ((d) & 0x00ffffff); \
331 *_ptr = tmp; \
332 } while (0)
333 #endif
334
335 #ifdef COMPILE_R300
336 #define READ_DEPTH( d, _x, _y ) \
337 do { \
338 d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
339 }while(0)
340 #else
341 #define READ_DEPTH( d, _x, _y ) \
342 d = *(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
343 #endif
344 /*
345 fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
346 d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff;
347 */
348 #define TAG(x) radeon##x##_z24
349 #include "depthtmp.h"
350
351 /* 24 bit depth, 8 bit stencil depthbuffer functions
352 * EXT_depth_stencil
353 *
354 * Careful: It looks like the R300 uses ZZZS byte order while the R200
355 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
356 */
357 #define VALUE_TYPE GLuint
358
359 #ifdef COMPILE_R300
360 #define WRITE_DEPTH( _x, _y, d ) \
361 do { \
362 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
363 *_ptr = d; \
364 } while (0)
365 #else
366 #define WRITE_DEPTH( _x, _y, d ) \
367 do { \
368 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
369 GLuint tmp = z24s8_to_s8z24(d); \
370 *_ptr = tmp; \
371 } while (0)
372 #endif
373
374 #ifdef COMPILE_R300
375 #define READ_DEPTH( d, _x, _y ) \
376 do { \
377 d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off))); \
378 }while(0)
379 #else
380 #define READ_DEPTH( d, _x, _y ) do { \
381 d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off ))); \
382 } while (0)
383 #endif
384 /*
385 fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
386 d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff;
387 */
388 #define TAG(x) radeon##x##_z24_s8
389 #include "depthtmp.h"
390
391 /* ================================================================
392 * Stencil buffer
393 */
394
395 /* 24 bit depth, 8 bit stencil depthbuffer functions
396 */
397 #ifdef COMPILE_R300
398 #define WRITE_STENCIL( _x, _y, d ) \
399 do { \
400 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \
401 GLuint tmp = *_ptr; \
402 tmp &= 0xffffff00; \
403 tmp |= (d) & 0xff; \
404 *_ptr = tmp; \
405 } while (0)
406 #else
407 #define WRITE_STENCIL( _x, _y, d ) \
408 do { \
409 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \
410 GLuint tmp = *_ptr; \
411 tmp &= 0x00ffffff; \
412 tmp |= (((d) & 0xff) << 24); \
413 *_ptr = tmp; \
414 } while (0)
415 #endif
416
417 #ifdef COMPILE_R300
418 #define READ_STENCIL( d, _x, _y ) \
419 do { \
420 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
421 GLuint tmp = *_ptr; \
422 d = tmp & 0x000000ff; \
423 } while (0)
424 #else
425 #define READ_STENCIL( d, _x, _y ) \
426 do { \
427 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
428 GLuint tmp = *_ptr; \
429 d = (tmp & 0xff000000) >> 24; \
430 } while (0)
431 #endif
432
433 #define TAG(x) radeon##x##_z24_s8
434 #include "stenciltmp.h"
435
436
437 static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
438 {
439 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
440 int r;
441
442 if (rrb == NULL || !rrb->bo)
443 return;
444
445 if (flag) {
446 if (rrb->bo->bom->funcs->bo_wait)
447 radeon_bo_wait(rrb->bo);
448 r = radeon_bo_map(rrb->bo, 1);
449 if (r) {
450 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
451 __FUNCTION__, r);
452 }
453
454 radeonSetSpanFunctions(rrb);
455 } else {
456 radeon_bo_unmap(rrb->bo);
457 rb->GetRow = NULL;
458 rb->PutRow = NULL;
459 }
460 }
461
462 static void
463 radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
464 {
465 GLuint i, j;
466
467 /* color draw buffers */
468 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
469 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
470
471 /* check for render to textures */
472 for (i = 0; i < BUFFER_COUNT; i++) {
473 struct gl_renderbuffer_attachment *att =
474 ctx->DrawBuffer->Attachment + i;
475 struct gl_texture_object *tex = att->Texture;
476 if (tex) {
477 /* render to texture */
478 ASSERT(att->Renderbuffer);
479 if (map)
480 ctx->Driver.MapTexture(ctx, tex);
481 else
482 ctx->Driver.UnmapTexture(ctx, tex);
483 }
484 }
485
486 map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
487
488 /* depth buffer (Note wrapper!) */
489 if (ctx->DrawBuffer->_DepthBuffer)
490 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
491
492 if (ctx->DrawBuffer->_StencilBuffer)
493 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
494
495 }
496 static void radeonSpanRenderStart(GLcontext * ctx)
497 {
498 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
499 int i;
500
501 radeon_firevertices(rmesa);
502
503 /* The locking and wait for idle should really only be needed in classic mode.
504 * In a future memory manager based implementation, this should become
505 * unnecessary due to the fact that mapping our buffers, textures, etc.
506 * should implicitly wait for any previous rendering commands that must
507 * be waited on. */
508 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
509 LOCK_HARDWARE(rmesa);
510 radeonWaitForIdleLocked(rmesa);
511 }
512 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
513 if (ctx->Texture.Unit[i]._ReallyEnabled)
514 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
515 }
516
517 radeon_map_unmap_buffers(ctx, 1);
518
519
520
521 }
522
523 static void radeonSpanRenderFinish(GLcontext * ctx)
524 {
525 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
526 int i;
527 _swrast_flush(ctx);
528 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
529 UNLOCK_HARDWARE(rmesa);
530 }
531 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
532 if (ctx->Texture.Unit[i]._ReallyEnabled)
533 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
534 }
535
536 radeon_map_unmap_buffers(ctx, 0);
537 }
538
539 void radeonInitSpanFuncs(GLcontext * ctx)
540 {
541 struct swrast_device_driver *swdd =
542 _swrast_GetDeviceDriverReference(ctx);
543 swdd->SpanRenderStart = radeonSpanRenderStart;
544 swdd->SpanRenderFinish = radeonSpanRenderFinish;
545 }
546
547 /**
548 * Plug in the Get/Put routines for the given driRenderbuffer.
549 */
550 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
551 {
552 if (rrb->base._ActualFormat == GL_RGB5) {
553 radeonInitPointers_RGB565(&rrb->base);
554 } else if (rrb->base._ActualFormat == GL_RGB8) {
555 radeonInitPointers_xRGB8888(&rrb->base);
556 } else if (rrb->base._ActualFormat == GL_RGBA8) {
557 radeonInitPointers_ARGB8888(&rrb->base);
558 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
559 radeonInitDepthPointers_z16(&rrb->base);
560 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
561 radeonInitDepthPointers_z24(&rrb->base);
562 } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
563 radeonInitDepthPointers_z24_s8(&rrb->base);
564 } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
565 radeonInitStencilPointers_z24_s8(&rrb->base);
566 }
567 }