1 /**************************************************************************
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
33 **************************************************************************/
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
43 #include "main/glheader.h"
44 #include "main/texformat.h"
45 #include "swrast/swrast.h"
47 #include "radeon_common.h"
48 #include "radeon_lock.h"
49 #include "radeon_span.h"
53 static void radeonSetSpanFunctions(struct radeon_renderbuffer
*rrb
);
56 /* r200 depth buffer is always tiled - this is the formula
57 according to the docs unless I typo'ed in it
59 #if defined(RADEON_R200)
60 static GLubyte
*r200_depth_2byte(const struct radeon_renderbuffer
* rrb
,
63 GLubyte
*ptr
= rrb
->bo
->ptr
;
65 if (rrb
->has_surface
) {
66 offset
= x
* rrb
->cpp
+ y
* rrb
->pitch
;
70 b
= (((y
>> 4) * (rrb
->pitch
>> 8) + (x
>> 6)));
71 offset
+= (b
>> 1) << 12;
72 offset
+= (((rrb
->pitch
>> 8) & 0x1) ? (b
& 0x1) : ((b
& 0x1) ^ ((y
>> 4) & 0x1))) << 11;
73 offset
+= ((y
>> 2) & 0x3) << 9;
74 offset
+= ((x
>> 3) & 0x1) << 8;
75 offset
+= ((x
>> 4) & 0x3) << 6;
76 offset
+= ((x
>> 2) & 0x1) << 5;
77 offset
+= ((y
>> 1) & 0x1) << 4;
78 offset
+= ((x
>> 1) & 0x1) << 3;
79 offset
+= (y
& 0x1) << 2;
80 offset
+= (x
& 0x1) << 1;
85 static GLubyte
*r200_depth_4byte(const struct radeon_renderbuffer
* rrb
,
88 GLubyte
*ptr
= rrb
->bo
->ptr
;
90 if (rrb
->has_surface
) {
91 offset
= x
* rrb
->cpp
+ y
* rrb
->pitch
;
95 b
= (((y
& 0x7ff) >> 4) * (rrb
->pitch
>> 7) + (x
>> 5));
96 offset
+= (b
>> 1) << 12;
97 offset
+= (((rrb
->pitch
>> 7) & 0x1) ? (b
& 0x1) : ((b
& 0x1) ^ ((y
>> 4) & 0x1))) << 11;
98 offset
+= ((y
>> 2) & 0x3) << 9;
99 offset
+= ((x
>> 2) & 0x1) << 8;
100 offset
+= ((x
>> 3) & 0x3) << 6;
101 offset
+= ((y
>> 1) & 0x1) << 5;
102 offset
+= ((x
>> 1) & 0x1) << 4;
103 offset
+= (y
& 0x1) << 3;
104 offset
+= (x
& 0x1) << 2;
112 * - 1D (akin to macro-linear/micro-tiled on older asics)
113 * - 2D (akin to macro-tiled/micro-tiled on older asics)
114 * only 1D tiling is implemented below
116 #if defined(RADEON_R600)
117 static inline GLint
r600_1d_tile_helper(const struct radeon_renderbuffer
* rrb
,
118 GLint x
, GLint y
, GLint is_depth
, GLint is_stencil
)
120 GLint element_bytes
= rrb
->cpp
;
121 GLint num_samples
= 1;
122 GLint tile_width
= 8;
123 GLint tile_height
= 8;
124 GLint tile_thickness
= 1;
125 GLint pitch_elements
= rrb
->pitch
/ element_bytes
;
126 GLint height
= rrb
->base
.Height
;
128 GLint sample_number
= 0;
132 GLint tiles_per_slice
;
134 GLint tile_row_index
;
135 GLint tile_column_index
;
137 GLint pixel_number
= 0;
138 GLint element_offset
;
141 tile_bytes
= tile_width
* tile_height
* tile_thickness
* element_bytes
* num_samples
;
142 tiles_per_row
= pitch_elements
/ tile_width
;
143 tiles_per_slice
= tiles_per_row
* (height
/ tile_height
);
144 slice_offset
= (z
/ tile_thickness
) * tiles_per_slice
* tile_bytes
;
145 tile_row_index
= y
/ tile_height
;
146 tile_column_index
= x
/ tile_width
;
147 tile_offset
= ((tile_row_index
* tiles_per_row
) + tile_column_index
) * tile_bytes
;
150 GLint pixel_offset
= 0;
152 pixel_number
|= ((x
>> 0) & 1) << 0; // pn[0] = x[0]
153 pixel_number
|= ((y
>> 0) & 1) << 1; // pn[1] = y[0]
154 pixel_number
|= ((x
>> 1) & 1) << 2; // pn[2] = x[1]
155 pixel_number
|= ((y
>> 1) & 1) << 3; // pn[3] = y[1]
156 pixel_number
|= ((x
>> 2) & 1) << 4; // pn[4] = x[2]
157 pixel_number
|= ((y
>> 2) & 1) << 5; // pn[5] = y[2]
158 switch (element_bytes
) {
160 pixel_offset
= pixel_number
* element_bytes
* num_samples
;
163 /* stencil and depth data are stored separately within a tile.
164 * stencil is stored in a contiguous tile before the depth tile.
165 * stencil element is 1 byte, depth element is 3 bytes.
166 * stencil tile is 64 bytes.
169 pixel_offset
= pixel_number
* 1 * num_samples
;
171 pixel_offset
= (pixel_number
* 3 * num_samples
) + 64;
174 element_offset
= pixel_offset
+ (sample_number
* element_bytes
);
178 switch (element_bytes
) {
180 pixel_number
|= ((x
>> 0) & 1) << 0; // pn[0] = x[0]
181 pixel_number
|= ((x
>> 1) & 1) << 1; // pn[1] = x[1]
182 pixel_number
|= ((x
>> 2) & 1) << 2; // pn[2] = x[2]
183 pixel_number
|= ((y
>> 1) & 1) << 3; // pn[3] = y[1]
184 pixel_number
|= ((y
>> 0) & 1) << 4; // pn[4] = y[0]
185 pixel_number
|= ((y
>> 2) & 1) << 5; // pn[5] = y[2]
188 pixel_number
|= ((x
>> 0) & 1) << 0; // pn[0] = x[0]
189 pixel_number
|= ((x
>> 1) & 1) << 1; // pn[1] = x[1]
190 pixel_number
|= ((x
>> 2) & 1) << 2; // pn[2] = x[2]
191 pixel_number
|= ((y
>> 0) & 1) << 3; // pn[3] = y[0]
192 pixel_number
|= ((y
>> 1) & 1) << 4; // pn[4] = y[1]
193 pixel_number
|= ((y
>> 2) & 1) << 5; // pn[5] = y[2]
196 pixel_number
|= ((x
>> 0) & 1) << 0; // pn[0] = x[0]
197 pixel_number
|= ((x
>> 1) & 1) << 1; // pn[1] = x[1]
198 pixel_number
|= ((y
>> 0) & 1) << 2; // pn[2] = y[0]
199 pixel_number
|= ((x
>> 2) & 1) << 3; // pn[3] = x[2]
200 pixel_number
|= ((y
>> 1) & 1) << 4; // pn[4] = y[1]
201 pixel_number
|= ((y
>> 2) & 1) << 5; // pn[5] = y[2]
204 sample_offset
= sample_number
* (tile_bytes
/ num_samples
);
205 element_offset
= sample_offset
+ (pixel_number
* element_bytes
);
207 offset
= slice_offset
+ tile_offset
+ element_offset
;
212 static GLubyte
*r600_ptr_depth(const struct radeon_renderbuffer
* rrb
,
215 GLubyte
*ptr
= rrb
->bo
->ptr
;
216 GLint offset
= r600_1d_tile_helper(rrb
, x
, y
, 1, 0);
220 static GLubyte
*r600_ptr_stencil(const struct radeon_renderbuffer
* rrb
,
223 GLubyte
*ptr
= rrb
->bo
->ptr
;
224 GLint offset
= r600_1d_tile_helper(rrb
, x
, y
, 1, 1);
228 static GLubyte
*r600_ptr_color(const struct radeon_renderbuffer
* rrb
,
231 GLubyte
*ptr
= rrb
->bo
->ptr
;
232 uint32_t mask
= RADEON_BO_FLAGS_MACRO_TILE
| RADEON_BO_FLAGS_MICRO_TILE
;
235 if (rrb
->has_surface
|| !(rrb
->bo
->flags
& mask
)) {
236 offset
= x
* rrb
->cpp
+ y
* rrb
->pitch
;
238 offset
= r600_1d_tile_helper(rrb
, x
, y
, 0, 0);
245 /* radeon tiling on r300-r500 has 4 states,
246 macro-linear/micro-linear
247 macro-linear/micro-tiled
248 macro-tiled /micro-linear
249 macro-tiled /micro-tiled
251 2 byte surface - two types - we only provide 8x2 microtiling
255 static GLubyte
*radeon_ptr_4byte(const struct radeon_renderbuffer
* rrb
,
258 GLubyte
*ptr
= rrb
->bo
->ptr
;
259 uint32_t mask
= RADEON_BO_FLAGS_MACRO_TILE
| RADEON_BO_FLAGS_MICRO_TILE
;
262 if (rrb
->has_surface
|| !(rrb
->bo
->flags
& mask
)) {
263 offset
= x
* rrb
->cpp
+ y
* rrb
->pitch
;
266 if (rrb
->bo
->flags
& RADEON_BO_FLAGS_MACRO_TILE
) {
267 if (rrb
->bo
->flags
& RADEON_BO_FLAGS_MICRO_TILE
) {
268 offset
= ((y
>> 4) * (rrb
->pitch
>> 7) + (x
>> 5)) << 11;
269 offset
+= (((y
>> 3) ^ (x
>> 5)) & 0x1) << 10;
270 offset
+= (((y
>> 4) ^ (x
>> 4)) & 0x1) << 9;
271 offset
+= (((y
>> 2) ^ (x
>> 4)) & 0x1) << 8;
272 offset
+= (((y
>> 3) ^ (x
>> 3)) & 0x1) << 7;
273 offset
+= ((y
>> 1) & 0x1) << 6;
274 offset
+= ((x
>> 2) & 0x1) << 5;
275 offset
+= (y
& 1) << 4;
276 offset
+= (x
& 3) << 2;
278 offset
= ((y
>> 3) * (rrb
->pitch
>> 8) + (x
>> 6)) << 11;
279 offset
+= (((y
>> 2) ^ (x
>> 6)) & 0x1) << 10;
280 offset
+= (((y
>> 3) ^ (x
>> 5)) & 0x1) << 9;
281 offset
+= (((y
>> 1) ^ (x
>> 5)) & 0x1) << 8;
282 offset
+= (((y
>> 2) ^ (x
>> 4)) & 0x1) << 7;
283 offset
+= (y
& 1) << 6;
284 offset
+= (x
& 15) << 2;
287 offset
= ((y
>> 1) * (rrb
->pitch
>> 4) + (x
>> 2)) << 5;
288 offset
+= (y
& 1) << 4;
289 offset
+= (x
& 3) << 2;
295 static GLubyte
*radeon_ptr_2byte_8x2(const struct radeon_renderbuffer
* rrb
,
298 GLubyte
*ptr
= rrb
->bo
->ptr
;
299 uint32_t mask
= RADEON_BO_FLAGS_MACRO_TILE
| RADEON_BO_FLAGS_MICRO_TILE
;
302 if (rrb
->has_surface
|| !(rrb
->bo
->flags
& mask
)) {
303 offset
= x
* rrb
->cpp
+ y
* rrb
->pitch
;
306 if (rrb
->bo
->flags
& RADEON_BO_FLAGS_MACRO_TILE
) {
307 if (rrb
->bo
->flags
& RADEON_BO_FLAGS_MICRO_TILE
) {
308 offset
= ((y
>> 4) * (rrb
->pitch
>> 7) + (x
>> 6)) << 11;
309 offset
+= (((y
>> 3) ^ (x
>> 6)) & 0x1) << 10;
310 offset
+= (((y
>> 4) ^ (x
>> 5)) & 0x1) << 9;
311 offset
+= (((y
>> 2) ^ (x
>> 5)) & 0x1) << 8;
312 offset
+= (((y
>> 3) ^ (x
>> 4)) & 0x1) << 7;
313 offset
+= ((y
>> 1) & 0x1) << 6;
314 offset
+= ((x
>> 3) & 0x1) << 5;
315 offset
+= (y
& 1) << 4;
316 offset
+= (x
& 3) << 2;
318 offset
= ((y
>> 3) * (rrb
->pitch
>> 8) + (x
>> 7)) << 11;
319 offset
+= (((y
>> 2) ^ (x
>> 7)) & 0x1) << 10;
320 offset
+= (((y
>> 3) ^ (x
>> 6)) & 0x1) << 9;
321 offset
+= (((y
>> 1) ^ (x
>> 6)) & 0x1) << 8;
322 offset
+= (((y
>> 2) ^ (x
>> 5)) & 0x1) << 7;
323 offset
+= (y
& 1) << 6;
324 offset
+= ((x
>> 4) & 0x1) << 5;
325 offset
+= (x
& 15) << 2;
328 offset
= ((y
>> 1) * (rrb
->pitch
>> 4) + (x
>> 3)) << 5;
329 offset
+= (y
& 0x1) << 4;
330 offset
+= (x
& 0x7) << 1;
339 * Note that all information needed to access pixels in a renderbuffer
340 * should be obtained through the gl_renderbuffer parameter, not per-context
344 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
345 struct radeon_renderbuffer *rrb = (void *) rb; \
346 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
347 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
348 unsigned int num_cliprects; \
349 struct drm_clip_rect *cliprects; \
353 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
355 #define LOCAL_DEPTH_VARS \
356 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
357 struct radeon_renderbuffer *rrb = (void *) rb; \
358 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
359 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
360 unsigned int num_cliprects; \
361 struct drm_clip_rect *cliprects; \
363 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
365 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
367 #define Y_FLIP(_y) ((_y) * yScale + yBias)
373 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
374 * the cliprect info from the context, not the driDrawable.
375 * Move this into spantmp2.h someday.
377 #define HW_CLIPLOOP() \
379 int _nc = num_cliprects; \
381 int minx = cliprects[_nc].x1 - x_off; \
382 int miny = cliprects[_nc].y1 - y_off; \
383 int maxx = cliprects[_nc].x2 - x_off; \
384 int maxy = cliprects[_nc].y2 - y_off;
386 /* ================================================================
390 /* 16 bit, RGB565 color spanline and pixel functions
392 #define SPANTMP_PIXEL_FMT GL_RGB
393 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
395 #define TAG(x) radeon##x##_RGB565
396 #define TAG2(x,y) radeon##x##_RGB565##y
397 #if defined(RADEON_R600)
398 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
400 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
402 #include "spantmp2.h"
404 #define SPANTMP_PIXEL_FMT GL_RGB
405 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5_REV
407 #define TAG(x) radeon##x##_RGB565_REV
408 #define TAG2(x,y) radeon##x##_RGB565_REV##y
409 #if defined(RADEON_R600)
410 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
412 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
414 #include "spantmp2.h"
416 /* 16 bit, ARGB1555 color spanline and pixel functions
418 #define SPANTMP_PIXEL_FMT GL_BGRA
419 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
421 #define TAG(x) radeon##x##_ARGB1555
422 #define TAG2(x,y) radeon##x##_ARGB1555##y
423 #if defined(RADEON_R600)
424 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
426 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
428 #include "spantmp2.h"
430 #define SPANTMP_PIXEL_FMT GL_BGRA
431 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5
433 #define TAG(x) radeon##x##_ARGB1555_REV
434 #define TAG2(x,y) radeon##x##_ARGB1555_REV##y
435 #if defined(RADEON_R600)
436 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
438 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
440 #include "spantmp2.h"
442 /* 16 bit, RGBA4 color spanline and pixel functions
444 #define SPANTMP_PIXEL_FMT GL_BGRA
445 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
447 #define TAG(x) radeon##x##_ARGB4444
448 #define TAG2(x,y) radeon##x##_ARGB4444##y
449 #if defined(RADEON_R600)
450 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
452 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
454 #include "spantmp2.h"
456 #define SPANTMP_PIXEL_FMT GL_BGRA
457 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4
459 #define TAG(x) radeon##x##_ARGB4444_REV
460 #define TAG2(x,y) radeon##x##_ARGB4444_REV##y
461 #if defined(RADEON_R600)
462 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
464 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
466 #include "spantmp2.h"
468 /* 32 bit, xRGB8888 color spanline and pixel functions
470 #define SPANTMP_PIXEL_FMT GL_BGRA
471 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
473 #define TAG(x) radeon##x##_xRGB8888
474 #define TAG2(x,y) radeon##x##_xRGB8888##y
475 #if defined(RADEON_R600)
476 #define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
477 #define PUT_VALUE(_x, _y, d) { \
478 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
482 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
483 #define PUT_VALUE(_x, _y, d) { \
484 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
488 #include "spantmp2.h"
490 /* 32 bit, ARGB8888 color spanline and pixel functions
492 #define SPANTMP_PIXEL_FMT GL_BGRA
493 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
495 #define TAG(x) radeon##x##_ARGB8888
496 #define TAG2(x,y) radeon##x##_ARGB8888##y
497 #if defined(RADEON_R600)
498 #define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
499 #define PUT_VALUE(_x, _y, d) { \
500 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
504 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
505 #define PUT_VALUE(_x, _y, d) { \
506 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
510 #include "spantmp2.h"
512 /* 32 bit, BGRx8888 color spanline and pixel functions
514 #define SPANTMP_PIXEL_FMT GL_BGRA
515 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
517 #define TAG(x) radeon##x##_BGRx8888
518 #define TAG2(x,y) radeon##x##_BGRx8888##y
519 #if defined(RADEON_R600)
520 #define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
521 #define PUT_VALUE(_x, _y, d) { \
522 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
526 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
527 #define PUT_VALUE(_x, _y, d) { \
528 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
532 #include "spantmp2.h"
534 /* 32 bit, BGRA8888 color spanline and pixel functions
536 #define SPANTMP_PIXEL_FMT GL_BGRA
537 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
539 #define TAG(x) radeon##x##_BGRA8888
540 #define TAG2(x,y) radeon##x##_BGRA8888##y
541 #if defined(RADEON_R600)
542 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
544 #define GET_PTR(X,Y) radeon_ptr_4byte(rrb, (X) + x_off, (Y) + y_off)
546 #include "spantmp2.h"
548 /* ================================================================
552 /* The Radeon family has depth tiling on all the time, so we have to convert
553 * the x,y coordinates into the memory bus address (mba) in the same
554 * manner as the engine. In each case, the linear block address (ba)
555 * is calculated, and then wired with x and y to produce the final
557 * The chip will do address translation on its own if the surface registers
558 * are set up correctly. It is not quite enough to get it working with hyperz
562 /* 16-bit depth buffer functions
564 #define VALUE_TYPE GLushort
566 #if defined(RADEON_R200)
567 #define WRITE_DEPTH( _x, _y, d ) \
568 *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
569 #elif defined(RADEON_R600)
570 #define WRITE_DEPTH( _x, _y, d ) \
571 *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
573 #define WRITE_DEPTH( _x, _y, d ) \
574 *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
577 #if defined(RADEON_R200)
578 #define READ_DEPTH( d, _x, _y ) \
579 d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
580 #elif defined(RADEON_R600)
581 #define READ_DEPTH( d, _x, _y ) \
582 d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
584 #define READ_DEPTH( d, _x, _y ) \
585 d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
588 #define TAG(x) radeon##x##_z16
589 #include "depthtmp.h"
593 * Careful: It looks like the R300 uses ZZZS byte order while the R200
594 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
596 #define VALUE_TYPE GLuint
598 #if defined(RADEON_R300)
599 #define WRITE_DEPTH( _x, _y, d ) \
601 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
602 GLuint tmp = LE32_TO_CPU(*_ptr); \
604 tmp |= ((d << 8) & 0xffffff00); \
605 *_ptr = CPU_TO_LE32(tmp); \
607 #elif defined(RADEON_R600)
608 #define WRITE_DEPTH( _x, _y, d ) \
610 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
611 GLuint tmp = *_ptr; \
613 tmp |= ((d) & 0x00ffffff); \
616 #elif defined(RADEON_R200)
617 #define WRITE_DEPTH( _x, _y, d ) \
619 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
620 GLuint tmp = LE32_TO_CPU(*_ptr); \
622 tmp |= ((d) & 0x00ffffff); \
623 *_ptr = CPU_TO_LE32(tmp); \
626 #define WRITE_DEPTH( _x, _y, d ) \
628 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
629 GLuint tmp = LE32_TO_CPU(*_ptr); \
631 tmp |= ((d) & 0x00ffffff); \
632 *_ptr = CPU_TO_LE32(tmp); \
636 #if defined(RADEON_R300)
637 #define READ_DEPTH( d, _x, _y ) \
639 d = (LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0xffffff00) >> 8; \
641 #elif defined(RADEON_R600)
642 #define READ_DEPTH( d, _x, _y ) \
644 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
646 #elif defined(RADEON_R200)
647 #define READ_DEPTH( d, _x, _y ) \
649 d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
652 #define READ_DEPTH( d, _x, _y ) \
653 d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff;
656 #define TAG(x) radeon##x##_z24
657 #include "depthtmp.h"
659 /* 24 bit depth, 8 bit stencil depthbuffer functions
662 * Careful: It looks like the R300 uses ZZZS byte order while the R200
663 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
665 #define VALUE_TYPE GLuint
667 #if defined(RADEON_R300)
668 #define WRITE_DEPTH( _x, _y, d ) \
670 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
671 *_ptr = CPU_TO_LE32((((d) & 0xff000000) >> 24) | (((d) & 0x00ffffff) << 8)); \
673 #elif defined(RADEON_R600)
674 #define WRITE_DEPTH( _x, _y, d ) \
676 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
677 GLuint tmp = *_ptr; \
679 tmp |= ((d) & 0x00ffffff); \
681 _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
684 tmp |= ((d) >> 24) & 0xff; \
687 #elif defined(RADEON_R200)
688 #define WRITE_DEPTH( _x, _y, d ) \
690 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
691 *_ptr = CPU_TO_LE32(d); \
694 #define WRITE_DEPTH( _x, _y, d ) \
696 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
697 *_ptr = CPU_TO_LE32(d); \
701 #if defined(RADEON_R300)
702 #define READ_DEPTH( d, _x, _y ) \
704 GLuint tmp = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
705 d = LE32_TO_CPU(((tmp & 0x000000ff) << 24) | ((tmp & 0xffffff00) >> 8)); \
707 #elif defined(RADEON_R600)
708 #define READ_DEPTH( d, _x, _y ) \
710 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
711 d |= ((*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) << 24) & 0xff000000; \
713 #elif defined(RADEON_R200)
714 #define READ_DEPTH( d, _x, _y ) \
716 d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
719 #define READ_DEPTH( d, _x, _y ) do { \
720 d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
724 #define TAG(x) radeon##x##_s8_z24
725 #include "depthtmp.h"
727 /* ================================================================
731 /* 24 bit depth, 8 bit stencil depthbuffer functions
734 #define WRITE_STENCIL( _x, _y, d ) \
736 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
737 GLuint tmp = LE32_TO_CPU(*_ptr); \
740 *_ptr = CPU_TO_LE32(tmp); \
742 #elif defined(RADEON_R600)
743 #define WRITE_STENCIL( _x, _y, d ) \
745 GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
746 GLuint tmp = *_ptr; \
751 #elif defined(RADEON_R200)
752 #define WRITE_STENCIL( _x, _y, d ) \
754 GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
755 GLuint tmp = LE32_TO_CPU(*_ptr); \
757 tmp |= (((d) & 0xff) << 24); \
758 *_ptr = CPU_TO_LE32(tmp); \
761 #define WRITE_STENCIL( _x, _y, d ) \
763 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
764 GLuint tmp = LE32_TO_CPU(*_ptr); \
766 tmp |= (((d) & 0xff) << 24); \
767 *_ptr = CPU_TO_LE32(tmp); \
772 #define READ_STENCIL( d, _x, _y ) \
774 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
775 GLuint tmp = LE32_TO_CPU(*_ptr); \
776 d = tmp & 0x000000ff; \
778 #elif defined(RADEON_R600)
779 #define READ_STENCIL( d, _x, _y ) \
781 GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
782 GLuint tmp = *_ptr; \
783 d = tmp & 0x000000ff; \
785 #elif defined(RADEON_R200)
786 #define READ_STENCIL( d, _x, _y ) \
788 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
789 GLuint tmp = LE32_TO_CPU(*_ptr); \
790 d = (tmp & 0xff000000) >> 24; \
793 #define READ_STENCIL( d, _x, _y ) \
795 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
796 GLuint tmp = LE32_TO_CPU(*_ptr); \
797 d = (tmp & 0xff000000) >> 24; \
801 #define TAG(x) radeon##x##_s8_z24
802 #include "stenciltmp.h"
805 static void map_unmap_rb(struct gl_renderbuffer
*rb
, int flag
)
807 struct radeon_renderbuffer
*rrb
= radeon_renderbuffer(rb
);
810 if (rrb
== NULL
|| !rrb
->bo
)
813 radeon_print(RADEON_MEMORY
, RADEON_TRACE
,
814 "%s( rb %p, flag %s )\n",
815 __func__
, rb
, flag
? "true":"false");
818 radeon_bo_wait(rrb
->bo
);
819 r
= radeon_bo_map(rrb
->bo
, 1);
821 fprintf(stderr
, "(%s) error(%d) mapping buffer.\n",
825 radeonSetSpanFunctions(rrb
);
827 radeon_bo_unmap(rrb
->bo
);
834 radeon_map_unmap_framebuffer(GLcontext
*ctx
, struct gl_framebuffer
*fb
,
839 radeon_print(RADEON_MEMORY
, RADEON_TRACE
,
840 "%s( %p , fb %p, map %s )\n",
841 __func__
, ctx
, fb
, map
? "true":"false");
843 /* color draw buffers */
844 for (j
= 0; j
< ctx
->DrawBuffer
->_NumColorDrawBuffers
; j
++)
845 map_unmap_rb(fb
->_ColorDrawBuffers
[j
], map
);
847 map_unmap_rb(fb
->_ColorReadBuffer
, map
);
849 /* check for render to textures */
850 for (i
= 0; i
< BUFFER_COUNT
; i
++) {
851 struct gl_renderbuffer_attachment
*att
=
853 struct gl_texture_object
*tex
= att
->Texture
;
855 /* Render to texture. Note that a mipmapped texture need not
856 * be complete for render to texture, so we must restrict to
857 * mapping only the attached image.
859 radeon_texture_image
*image
= get_radeon_texture_image(tex
->Image
[att
->CubeMapFace
][att
->TextureLevel
]);
860 ASSERT(att
->Renderbuffer
);
863 radeon_teximage_map(image
, GL_TRUE
);
865 radeon_teximage_unmap(image
);
869 /* depth buffer (Note wrapper!) */
870 if (fb
->_DepthBuffer
)
871 map_unmap_rb(fb
->_DepthBuffer
->Wrapped
, map
);
873 if (fb
->_StencilBuffer
)
874 map_unmap_rb(fb
->_StencilBuffer
->Wrapped
, map
);
876 radeon_check_front_buffer_rendering(ctx
);
879 static void radeonSpanRenderStart(GLcontext
* ctx
)
881 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
884 radeon_firevertices(rmesa
);
886 /* The locking and wait for idle should really only be needed in classic mode.
887 * In a future memory manager based implementation, this should become
888 * unnecessary due to the fact that mapping our buffers, textures, etc.
889 * should implicitly wait for any previous rendering commands that must
891 if (!rmesa
->radeonScreen
->driScreen
->dri2
.enabled
) {
892 LOCK_HARDWARE(rmesa
);
893 radeonWaitForIdleLocked(rmesa
);
896 for (i
= 0; i
< ctx
->Const
.MaxTextureImageUnits
; i
++) {
897 if (ctx
->Texture
.Unit
[i
]._ReallyEnabled
)
898 ctx
->Driver
.MapTexture(ctx
, ctx
->Texture
.Unit
[i
]._Current
);
901 radeon_map_unmap_framebuffer(ctx
, ctx
->DrawBuffer
, GL_TRUE
);
902 if (ctx
->ReadBuffer
!= ctx
->DrawBuffer
)
903 radeon_map_unmap_framebuffer(ctx
, ctx
->ReadBuffer
, GL_TRUE
);
906 static void radeonSpanRenderFinish(GLcontext
* ctx
)
908 radeonContextPtr rmesa
= RADEON_CONTEXT(ctx
);
913 for (i
= 0; i
< ctx
->Const
.MaxTextureImageUnits
; i
++) {
914 if (ctx
->Texture
.Unit
[i
]._ReallyEnabled
)
915 ctx
->Driver
.UnmapTexture(ctx
, ctx
->Texture
.Unit
[i
]._Current
);
918 radeon_map_unmap_framebuffer(ctx
, ctx
->DrawBuffer
, GL_FALSE
);
919 if (ctx
->ReadBuffer
!= ctx
->DrawBuffer
)
920 radeon_map_unmap_framebuffer(ctx
, ctx
->ReadBuffer
, GL_FALSE
);
922 if (!rmesa
->radeonScreen
->driScreen
->dri2
.enabled
) {
923 UNLOCK_HARDWARE(rmesa
);
927 void radeonInitSpanFuncs(GLcontext
* ctx
)
929 struct swrast_device_driver
*swdd
=
930 _swrast_GetDeviceDriverReference(ctx
);
931 swdd
->SpanRenderStart
= radeonSpanRenderStart
;
932 swdd
->SpanRenderFinish
= radeonSpanRenderFinish
;
936 * Plug in the Get/Put routines for the given driRenderbuffer.
938 static void radeonSetSpanFunctions(struct radeon_renderbuffer
*rrb
)
940 if (rrb
->base
.Format
== MESA_FORMAT_RGB565
) {
941 radeonInitPointers_RGB565(&rrb
->base
);
942 } else if (rrb
->base
.Format
== MESA_FORMAT_RGB565_REV
) {
943 radeonInitPointers_RGB565_REV(&rrb
->base
);
944 } else if (rrb
->base
.Format
== MESA_FORMAT_XRGB8888
) {
945 radeonInitPointers_xRGB8888(&rrb
->base
);
946 } else if (rrb
->base
.Format
== MESA_FORMAT_XRGB8888_REV
) {
947 radeonInitPointers_BGRx8888(&rrb
->base
);
948 } else if (rrb
->base
.Format
== MESA_FORMAT_ARGB8888
) {
949 radeonInitPointers_ARGB8888(&rrb
->base
);
950 } else if (rrb
->base
.Format
== MESA_FORMAT_ARGB8888_REV
) {
951 radeonInitPointers_BGRA8888(&rrb
->base
);
952 } else if (rrb
->base
.Format
== MESA_FORMAT_ARGB4444
) {
953 radeonInitPointers_ARGB4444(&rrb
->base
);
954 } else if (rrb
->base
.Format
== MESA_FORMAT_ARGB4444_REV
) {
955 radeonInitPointers_ARGB4444_REV(&rrb
->base
);
956 } else if (rrb
->base
.Format
== MESA_FORMAT_ARGB1555
) {
957 radeonInitPointers_ARGB1555(&rrb
->base
);
958 } else if (rrb
->base
.Format
== MESA_FORMAT_ARGB1555_REV
) {
959 radeonInitPointers_ARGB1555_REV(&rrb
->base
);
960 } else if (rrb
->base
.Format
== MESA_FORMAT_Z16
) {
961 radeonInitDepthPointers_z16(&rrb
->base
);
962 } else if (rrb
->base
.Format
== MESA_FORMAT_X8_Z24
) {
963 radeonInitDepthPointers_z24(&rrb
->base
);
964 } else if (rrb
->base
.Format
== MESA_FORMAT_S8_Z24
) {
965 radeonInitDepthPointers_s8_z24(&rrb
->base
);
966 } else if (rrb
->base
.Format
== MESA_FORMAT_S8
) {
967 radeonInitStencilPointers_s8_z24(&rrb
->base
);
969 fprintf(stderr
, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb
->base
.Format
);