1 /**************************************************************************
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 **************************************************************************/
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
43 #include "main/glheader.h"
44 #include "swrast/swrast.h"
46 #include "radeon_common.h"
47 #include "radeon_lock.h"
48 #include "radeon_span.h"
52 static void radeonSetSpanFunctions(struct radeon_renderbuffer
*rrb
);
54 /* radeon tiling on r300-r500 has 4 states,
55 macro-linear/micro-linear
56 macro-linear/micro-tiled
57 macro-tiled /micro-linear
58 macro-tiled /micro-tiled
60 2 byte surface - two types - we only provide 8x2 microtiling
65 static GLubyte
*radeon_ptr_4byte(const struct radeon_renderbuffer
* rrb
,
68 GLubyte
*ptr
= rrb
->bo
->ptr
;
69 uint32_t mask
= RADEON_BO_FLAGS_MACRO_TILE
| RADEON_BO_FLAGS_MICRO_TILE
;
72 if (rrb
->has_surface
|| !(rrb
->bo
->flags
& mask
)) {
73 offset
= x
* rrb
->cpp
+ y
* rrb
->pitch
;
76 if (rrb
->bo
->flags
& RADEON_BO_FLAGS_MACRO_TILE
) {
77 if (rrb
->bo
->flags
& RADEON_BO_FLAGS_MICRO_TILE
) {
78 offset
= ((y
>> 4) * (rrb
->pitch
>> 7) + (x
>> 5)) << 11;
79 offset
+= (((y
>> 3) ^ (x
>> 5)) & 0x1) << 10;
80 offset
+= (((y
>> 4) ^ (x
>> 4)) & 0x1) << 9;
81 offset
+= (((y
>> 2) ^ (x
>> 4)) & 0x1) << 8;
82 offset
+= (((y
>> 3) ^ (x
>> 3)) & 0x1) << 7;
83 offset
+= ((y
>> 1) & 0x1) << 6;
84 offset
+= ((x
>> 2) & 0x1) << 5;
85 offset
+= (y
& 1) << 4;
86 offset
+= (x
& 3) << 2;
88 offset
= ((y
>> 3) * (rrb
->pitch
>> 8) + (x
>> 6)) << 11;
89 offset
+= (((y
>> 2) ^ (x
>> 6)) & 0x1) << 10;
90 offset
+= (((y
>> 3) ^ (x
>> 5)) & 0x1) << 9;
91 offset
+= (((y
>> 1) ^ (x
>> 5)) & 0x1) << 8;
92 offset
+= (((y
>> 2) ^ (x
>> 4)) & 0x1) << 7;
93 offset
+= (y
& 1) << 6;
94 offset
+= (x
& 15) << 2;
97 offset
= ((y
>> 1) * (rrb
->pitch
>> 4) + (x
>> 2)) << 5;
98 offset
+= (y
& 1) << 4;
99 offset
+= (x
& 3) << 2;
105 static GLubyte
*radeon_ptr_2byte_8x2(const struct radeon_renderbuffer
* rrb
,
108 GLubyte
*ptr
= rrb
->bo
->ptr
;
109 uint32_t mask
= RADEON_BO_FLAGS_MACRO_TILE
| RADEON_BO_FLAGS_MICRO_TILE
;
112 if (rrb
->has_surface
|| !(rrb
->bo
->flags
& mask
)) {
113 offset
= x
* rrb
->cpp
+ y
* rrb
->pitch
;
116 if (rrb
->bo
->flags
& RADEON_BO_FLAGS_MACRO_TILE
) {
117 if (rrb
->bo
->flags
& RADEON_BO_FLAGS_MICRO_TILE
) {
118 offset
= ((y
>> 4) * (rrb
->pitch
>> 7) + (x
>> 6)) << 11;
119 offset
+= (((y
>> 3) ^ (x
>> 6)) & 0x1) << 10;
120 offset
+= (((y
>> 4) ^ (x
>> 5)) & 0x1) << 9;
121 offset
+= (((y
>> 2) ^ (x
>> 5)) & 0x1) << 8;
122 offset
+= (((y
>> 3) ^ (x
>> 4)) & 0x1) << 7;
123 offset
+= ((y
>> 1) & 0x1) << 6;
124 offset
+= ((x
>> 3) & 0x1) << 5;
125 offset
+= (y
& 1) << 4;
126 offset
+= (x
& 3) << 2;
128 offset
= ((y
>> 3) * (rrb
->pitch
>> 8) + (x
>> 7)) << 11;
129 offset
+= (((y
>> 2) ^ (x
>> 7)) & 0x1) << 10;
130 offset
+= (((y
>> 3) ^ (x
>> 6)) & 0x1) << 9;
131 offset
+= (((y
>> 1) ^ (x
>> 6)) & 0x1) << 8;
132 offset
+= (((y
>> 2) ^ (x
>> 5)) & 0x1) << 7;
133 offset
+= (y
& 1) << 6;
134 offset
+= ((x
>> 4) & 0x1) << 5;
135 offset
+= (x
& 15) << 2;
138 offset
= ((y
>> 1) * (rrb
->pitch
>> 4) + (x
>> 3)) << 5;
139 offset
+= (y
& 0x1) << 4;
140 offset
+= (x
& 0x7) << 1;
148 z24s8_to_s8z24(uint32_t val
)
150 return (val
<< 24) | (val
>> 8);
154 s8z24_to_z24s8(uint32_t val
)
156 return (val
>> 24) | (val
<< 8);
161 * Note that all information needed to access pixels in a renderbuffer
162 * should be obtained through the gl_renderbuffer parameter, not per-context
166 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
167 struct radeon_renderbuffer *rrb = (void *) rb; \
168 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
169 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
170 unsigned int num_cliprects; \
171 struct drm_clip_rect *cliprects; \
175 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
177 #define LOCAL_DEPTH_VARS \
178 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
179 struct radeon_renderbuffer *rrb = (void *) rb; \
180 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
181 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
182 unsigned int num_cliprects; \
183 struct drm_clip_rect *cliprects; \
185 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
187 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
189 #define Y_FLIP(_y) ((_y) * yScale + yBias)
195 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
196 * the cliprect info from the context, not the driDrawable.
197 * Move this into spantmp2.h someday.
199 #define HW_CLIPLOOP() \
201 int _nc = num_cliprects; \
203 int minx = cliprects[_nc].x1 - x_off; \
204 int miny = cliprects[_nc].y1 - y_off; \
205 int maxx = cliprects[_nc].x2 - x_off; \
206 int maxy = cliprects[_nc].y2 - y_off;
208 /* ================================================================
212 /* 16 bit, RGB565 color spanline and pixel functions
214 #define SPANTMP_PIXEL_FMT GL_RGB
215 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
217 #define TAG(x) radeon##x##_RGB565
218 #define TAG2(x,y) radeon##x##_RGB565##y
219 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
220 #include "spantmp2.h"
222 /* 16 bit, ARGB1555 color spanline and pixel functions
224 #define SPANTMP_PIXEL_FMT GL_BGRA
225 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
227 #define TAG(x) radeon##x##_ARGB1555
228 #define TAG2(x,y) radeon##x##_ARGB1555##y
229 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
230 #include "spantmp2.h"
232 /* 16 bit, RGBA4 color spanline and pixel functions
234 #define SPANTMP_PIXEL_FMT GL_BGRA
235 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
237 #define TAG(x) radeon##x##_ARGB4444
238 #define TAG2(x,y) radeon##x##_ARGB4444##y
239 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
240 #include "spantmp2.h"
242 /* 32 bit, xRGB8888 color spanline and pixel functions
244 #define SPANTMP_PIXEL_FMT GL_BGRA
245 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
247 #define TAG(x) radeon##x##_xRGB8888
248 #define TAG2(x,y) radeon##x##_xRGB8888##y
249 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
250 #define PUT_VALUE(_x, _y, d) { \
251 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
254 #include "spantmp2.h"
256 /* 32 bit, ARGB8888 color spanline and pixel functions
258 #define SPANTMP_PIXEL_FMT GL_BGRA
259 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
261 #define TAG(x) radeon##x##_ARGB8888
262 #define TAG2(x,y) radeon##x##_ARGB8888##y
263 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
264 #define PUT_VALUE(_x, _y, d) { \
265 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
268 #include "spantmp2.h"
270 /* ================================================================
274 /* The Radeon family has depth tiling on all the time, so we have to convert
275 * the x,y coordinates into the memory bus address (mba) in the same
276 * manner as the engine. In each case, the linear block address (ba)
277 * is calculated, and then wired with x and y to produce the final
279 * The chip will do address translation on its own if the surface registers
280 * are set up correctly. It is not quite enough to get it working with hyperz
284 /* 16-bit depth buffer functions
286 #define VALUE_TYPE GLushort
288 #define WRITE_DEPTH( _x, _y, d ) \
289 *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
291 #define READ_DEPTH( d, _x, _y ) \
292 d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
294 #define TAG(x) radeon##x##_z16
295 #include "depthtmp.h"
299 * Careful: It looks like the R300 uses ZZZS byte order while the R200
300 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
302 #define VALUE_TYPE GLuint
305 #define WRITE_DEPTH( _x, _y, d ) \
307 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
308 GLuint tmp = *_ptr; \
310 tmp |= ((d << 8) & 0xffffff00); \
314 #define WRITE_DEPTH( _x, _y, d ) \
316 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
317 GLuint tmp = *_ptr; \
319 tmp |= ((d) & 0x00ffffff); \
325 #define READ_DEPTH( d, _x, _y ) \
327 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
330 #define READ_DEPTH( d, _x, _y ) \
331 d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff;
334 fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
335 d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff;
337 #define TAG(x) radeon##x##_z24
338 #include "depthtmp.h"
340 /* 24 bit depth, 8 bit stencil depthbuffer functions
343 * Careful: It looks like the R300 uses ZZZS byte order while the R200
344 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
346 #define VALUE_TYPE GLuint
349 #define WRITE_DEPTH( _x, _y, d ) \
351 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
355 #define WRITE_DEPTH( _x, _y, d ) \
357 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
358 GLuint tmp = z24s8_to_s8z24(d); \
364 #define READ_DEPTH( d, _x, _y ) \
366 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
369 #define READ_DEPTH( d, _x, _y ) do { \
370 d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \
374 fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
375 d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff;
377 #define TAG(x) radeon##x##_z24_s8
378 #include "depthtmp.h"
380 /* ================================================================
384 /* 24 bit depth, 8 bit stencil depthbuffer functions
387 #define WRITE_STENCIL( _x, _y, d ) \
389 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
390 GLuint tmp = *_ptr; \
396 #define WRITE_STENCIL( _x, _y, d ) \
398 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
399 GLuint tmp = *_ptr; \
401 tmp |= (((d) & 0xff) << 24); \
407 #define READ_STENCIL( d, _x, _y ) \
409 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
410 GLuint tmp = *_ptr; \
411 d = tmp & 0x000000ff; \
414 #define READ_STENCIL( d, _x, _y ) \
416 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
417 GLuint tmp = *_ptr; \
418 d = (tmp & 0xff000000) >> 24; \
422 #define TAG(x) radeon##x##_z24_s8
423 #include "stenciltmp.h"
426 static void map_unmap_rb(struct gl_renderbuffer
*rb
, int flag
)
428 struct radeon_renderbuffer
*rrb
= radeon_renderbuffer(rb
);
431 if (rrb
== NULL
|| !rrb
->bo
)
435 if (rrb
->bo
->bom
->funcs
->bo_wait
)
436 radeon_bo_wait(rrb
->bo
);
437 r
= radeon_bo_map(rrb
->bo
, 1);
439 fprintf(stderr
, "(%s) error(%d) mapping buffer.\n",
443 radeonSetSpanFunctions(rrb
);
445 radeon_bo_unmap(rrb
->bo
);
452 radeon_map_unmap_buffers(GLcontext
*ctx
, GLboolean map
)
456 /* color draw buffers */
457 for (j
= 0; j
< ctx
->DrawBuffer
->_NumColorDrawBuffers
; j
++)
458 map_unmap_rb(ctx
->DrawBuffer
->_ColorDrawBuffers
[j
], map
);
460 /* check for render to textures */
461 for (i
= 0; i
< BUFFER_COUNT
; i
++) {
462 struct gl_renderbuffer_attachment
*att
=
463 ctx
->DrawBuffer
->Attachment
+ i
;
464 struct gl_texture_object
*tex
= att
->Texture
;
466 /* Render to texture. Note that a mipmapped texture need not
467 * be complete for render to texture, so we must restrict to
468 * mapping only the attached image.
470 radeon_texture_image
*image
= get_radeon_texture_image(tex
->Image
[att
->CubeMapFace
][att
->TextureLevel
]);
471 ASSERT(att
->Renderbuffer
);
474 radeon_teximage_map(image
, GL_TRUE
);
476 radeon_teximage_unmap(image
);
480 map_unmap_rb(ctx
->ReadBuffer
->_ColorReadBuffer
, map
);
482 /* depth buffer (Note wrapper!) */
483 if (ctx
->DrawBuffer
->_DepthBuffer
)
484 map_unmap_rb(ctx
->DrawBuffer
->_DepthBuffer
->Wrapped
, map
);
486 if (ctx
->DrawBuffer
->_StencilBuffer
)
487 map_unmap_rb(ctx
->DrawBuffer
->_StencilBuffer
->Wrapped
, map
);
490 static void radeonSpanRenderStart(GLcontext
* ctx
)
492 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
495 radeon_firevertices(rmesa
);
497 /* The locking and wait for idle should really only be needed in classic mode.
498 * In a future memory manager based implementation, this should become
499 * unnecessary due to the fact that mapping our buffers, textures, etc.
500 * should implicitly wait for any previous rendering commands that must
502 if (!rmesa
->radeonScreen
->driScreen
->dri2
.enabled
) {
503 LOCK_HARDWARE(rmesa
);
504 radeonWaitForIdleLocked(rmesa
);
507 for (i
= 0; i
< ctx
->Const
.MaxTextureImageUnits
; i
++) {
508 if (ctx
->Texture
.Unit
[i
]._ReallyEnabled
)
509 ctx
->Driver
.MapTexture(ctx
, ctx
->Texture
.Unit
[i
]._Current
);
512 radeon_map_unmap_buffers(ctx
, 1);
515 static void radeonSpanRenderFinish(GLcontext
* ctx
)
517 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
520 if (!rmesa
->radeonScreen
->driScreen
->dri2
.enabled
) {
521 UNLOCK_HARDWARE(rmesa
);
523 for (i
= 0; i
< ctx
->Const
.MaxTextureImageUnits
; i
++) {
524 if (ctx
->Texture
.Unit
[i
]._ReallyEnabled
)
525 ctx
->Driver
.UnmapTexture(ctx
, ctx
->Texture
.Unit
[i
]._Current
);
528 radeon_map_unmap_buffers(ctx
, 0);
531 void radeonInitSpanFuncs(GLcontext
* ctx
)
533 struct swrast_device_driver
*swdd
=
534 _swrast_GetDeviceDriverReference(ctx
);
535 swdd
->SpanRenderStart
= radeonSpanRenderStart
;
536 swdd
->SpanRenderFinish
= radeonSpanRenderFinish
;
540 * Plug in the Get/Put routines for the given driRenderbuffer.
542 static void radeonSetSpanFunctions(struct radeon_renderbuffer
*rrb
)
544 if (rrb
->base
._ActualFormat
== GL_RGB5
) {
545 radeonInitPointers_RGB565(&rrb
->base
);
546 } else if (rrb
->base
._ActualFormat
== GL_RGB8
) {
547 radeonInitPointers_xRGB8888(&rrb
->base
);
548 } else if (rrb
->base
._ActualFormat
== GL_RGBA8
) {
549 radeonInitPointers_ARGB8888(&rrb
->base
);
550 } else if (rrb
->base
._ActualFormat
== GL_RGBA4
) {
551 radeonInitPointers_ARGB4444(&rrb
->base
);
552 } else if (rrb
->base
._ActualFormat
== GL_RGB5_A1
) {
553 radeonInitPointers_ARGB1555(&rrb
->base
);
554 } else if (rrb
->base
._ActualFormat
== GL_DEPTH_COMPONENT16
) {
555 radeonInitDepthPointers_z16(&rrb
->base
);
556 } else if (rrb
->base
._ActualFormat
== GL_DEPTH_COMPONENT24
) {
557 radeonInitDepthPointers_z24(&rrb
->base
);
558 } else if (rrb
->base
._ActualFormat
== GL_DEPTH24_STENCIL8_EXT
) {
559 radeonInitDepthPointers_z24_s8(&rrb
->base
);
560 } else if (rrb
->base
._ActualFormat
== GL_STENCIL_INDEX8_EXT
) {
561 radeonInitStencilPointers_z24_s8(&rrb
->base
);
563 fprintf(stderr
, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb
->base
._ActualFormat
);