r100/r200: fix Y coord flipping in accelerated blits
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "main/texformat.h"
45 #include "swrast/swrast.h"
46
47 #include "radeon_common.h"
48 #include "radeon_lock.h"
49 #include "radeon_span.h"
50
51 #define DBG 0
52
53 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
54
55
56 /* r200 depth buffer is always tiled - this is the formula
57 according to the docs unless I typo'ed in it
58 */
59 #if defined(RADEON_R200)
60 static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
61 GLint x, GLint y)
62 {
63 GLubyte *ptr = rrb->bo->ptr;
64 GLint offset;
65 if (rrb->has_surface) {
66 offset = x * rrb->cpp + y * rrb->pitch;
67 } else {
68 GLuint b;
69 offset = 0;
70 b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6)));
71 offset += (b >> 1) << 12;
72 offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
73 offset += ((y >> 2) & 0x3) << 9;
74 offset += ((x >> 3) & 0x1) << 8;
75 offset += ((x >> 4) & 0x3) << 6;
76 offset += ((x >> 2) & 0x1) << 5;
77 offset += ((y >> 1) & 0x1) << 4;
78 offset += ((x >> 1) & 0x1) << 3;
79 offset += (y & 0x1) << 2;
80 offset += (x & 0x1) << 1;
81 }
82 return &ptr[offset];
83 }
84
85 static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
86 GLint x, GLint y)
87 {
88 GLubyte *ptr = rrb->bo->ptr;
89 GLint offset;
90 if (rrb->has_surface) {
91 offset = x * rrb->cpp + y * rrb->pitch;
92 } else {
93 GLuint b;
94 offset = 0;
95 b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
96 offset += (b >> 1) << 12;
97 offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
98 offset += ((y >> 2) & 0x3) << 9;
99 offset += ((x >> 2) & 0x1) << 8;
100 offset += ((x >> 3) & 0x3) << 6;
101 offset += ((y >> 1) & 0x1) << 5;
102 offset += ((x >> 1) & 0x1) << 4;
103 offset += (y & 0x1) << 3;
104 offset += (x & 0x1) << 2;
105 }
106 return &ptr[offset];
107 }
108 #endif
109
110 /* r600 tiling
111 * two main types:
112 * - 1D (akin to macro-linear/micro-tiled on older asics)
113 * - 2D (akin to macro-tiled/micro-tiled on older asics)
114 * only 1D tiling is implemented below
115 */
116 #if defined(RADEON_R600)
117 static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
118 GLint x, GLint y, GLint is_depth, GLint is_stencil)
119 {
120 GLint element_bytes = rrb->cpp;
121 GLint num_samples = 1;
122 GLint tile_width = 8;
123 GLint tile_height = 8;
124 GLint tile_thickness = 1;
125 GLint pitch_elements = rrb->pitch / element_bytes;
126 GLint height = rrb->base.Height;
127 GLint z = 0;
128 GLint sample_number = 0;
129 /* */
130 GLint tile_bytes;
131 GLint tiles_per_row;
132 GLint tiles_per_slice;
133 GLint slice_offset;
134 GLint tile_row_index;
135 GLint tile_column_index;
136 GLint tile_offset;
137 GLint pixel_number = 0;
138 GLint element_offset;
139 GLint offset = 0;
140
141 tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
142 tiles_per_row = pitch_elements / tile_width;
143 tiles_per_slice = tiles_per_row * (height / tile_height);
144 slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
145 tile_row_index = y / tile_height;
146 tile_column_index = x / tile_width;
147 tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
148
149 if (is_depth) {
150 GLint pixel_offset = 0;
151
152 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
153 pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
154 pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
155 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
156 pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
157 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
158 switch (element_bytes) {
159 case 2:
160 pixel_offset = pixel_number * element_bytes * num_samples;
161 break;
162 case 4:
163 /* stencil and depth data are stored separately within a tile.
164 * stencil is stored in a contiguous tile before the depth tile.
165 * stencil element is 1 byte, depth element is 3 bytes.
166 * stencil tile is 64 bytes.
167 */
168 if (is_stencil)
169 pixel_offset = pixel_number * 1 * num_samples;
170 else
171 pixel_offset = (pixel_number * 3 * num_samples) + 64;
172 break;
173 }
174 element_offset = pixel_offset + (sample_number * element_bytes);
175 } else {
176 GLint sample_offset;
177
178 switch (element_bytes) {
179 case 1:
180 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
181 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
182 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
183 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
184 pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
185 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
186 break;
187 case 2:
188 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
189 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
190 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
191 pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
192 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
193 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
194 break;
195 case 4:
196 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
197 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
198 pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
199 pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
200 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
201 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
202 break;
203 }
204 sample_offset = sample_number * (tile_bytes / num_samples);
205 element_offset = sample_offset + (pixel_number * element_bytes);
206 }
207 offset = slice_offset + tile_offset + element_offset;
208 return offset;
209 }
210
211 /* depth buffers */
212 static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
213 GLint x, GLint y)
214 {
215 GLubyte *ptr = rrb->bo->ptr;
216 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
217 return &ptr[offset];
218 }
219
220 static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
221 GLint x, GLint y)
222 {
223 GLubyte *ptr = rrb->bo->ptr;
224 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
225 return &ptr[offset];
226 }
227
228 static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
229 GLint x, GLint y)
230 {
231 GLubyte *ptr = rrb->bo->ptr;
232 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
233 GLint offset;
234
235 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
236 offset = x * rrb->cpp + y * rrb->pitch;
237 } else {
238 offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
239 }
240 return &ptr[offset];
241 }
242
243 #else
244
245 /* radeon tiling on r300-r500 has 4 states,
246 macro-linear/micro-linear
247 macro-linear/micro-tiled
248 macro-tiled /micro-linear
249 macro-tiled /micro-tiled
250 1 byte surface
251 2 byte surface - two types - we only provide 8x2 microtiling
252 4 byte surface
253 8/16 byte (unused)
254 */
255 static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
256 GLint x, GLint y)
257 {
258 GLubyte *ptr = rrb->bo->ptr;
259 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
260 GLint offset;
261
262 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
263 offset = x * rrb->cpp + y * rrb->pitch;
264 } else {
265 offset = 0;
266 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
267 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
268 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
269 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
270 offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
271 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
272 offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
273 offset += ((y >> 1) & 0x1) << 6;
274 offset += ((x >> 2) & 0x1) << 5;
275 offset += (y & 1) << 4;
276 offset += (x & 3) << 2;
277 } else {
278 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
279 offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
280 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
281 offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
282 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
283 offset += (y & 1) << 6;
284 offset += (x & 15) << 2;
285 }
286 } else {
287 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
288 offset += (y & 1) << 4;
289 offset += (x & 3) << 2;
290 }
291 }
292 return &ptr[offset];
293 }
294
295 static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
296 GLint x, GLint y)
297 {
298 GLubyte *ptr = rrb->bo->ptr;
299 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
300 GLint offset;
301
302 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
303 offset = x * rrb->cpp + y * rrb->pitch;
304 } else {
305 offset = 0;
306 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
307 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
308 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
309 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
310 offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
311 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
312 offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
313 offset += ((y >> 1) & 0x1) << 6;
314 offset += ((x >> 3) & 0x1) << 5;
315 offset += (y & 1) << 4;
316 offset += (x & 3) << 2;
317 } else {
318 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
319 offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
320 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
321 offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
322 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
323 offset += (y & 1) << 6;
324 offset += ((x >> 4) & 0x1) << 5;
325 offset += (x & 15) << 2;
326 }
327 } else {
328 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
329 offset += (y & 0x1) << 4;
330 offset += (x & 0x7) << 1;
331 }
332 }
333 return &ptr[offset];
334 }
335
336 #endif
337
338 /*
339 * Note that all information needed to access pixels in a renderbuffer
340 * should be obtained through the gl_renderbuffer parameter, not per-context
341 * information.
342 */
343 #define LOCAL_VARS \
344 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
345 struct radeon_renderbuffer *rrb = (void *) rb; \
346 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
347 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
348 unsigned int num_cliprects; \
349 struct drm_clip_rect *cliprects; \
350 int x_off, y_off; \
351 GLuint p; \
352 (void)p; \
353 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
354
355 #define LOCAL_DEPTH_VARS \
356 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
357 struct radeon_renderbuffer *rrb = (void *) rb; \
358 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
359 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
360 unsigned int num_cliprects; \
361 struct drm_clip_rect *cliprects; \
362 int x_off, y_off; \
363 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
364
365 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
366
367 #define Y_FLIP(_y) ((_y) * yScale + yBias)
368
369 #define HW_LOCK()
370
371 #define HW_UNLOCK()
372
373 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
374 * the cliprect info from the context, not the driDrawable.
375 * Move this into spantmp2.h someday.
376 */
377 #define HW_CLIPLOOP() \
378 do { \
379 int _nc = num_cliprects; \
380 while ( _nc-- ) { \
381 int minx = cliprects[_nc].x1 - x_off; \
382 int miny = cliprects[_nc].y1 - y_off; \
383 int maxx = cliprects[_nc].x2 - x_off; \
384 int maxy = cliprects[_nc].y2 - y_off;
385
386 /* ================================================================
387 * Color buffer
388 */
389
390 /* 16 bit, RGB565 color spanline and pixel functions
391 */
392 #define SPANTMP_PIXEL_FMT GL_RGB
393 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
394
395 #define TAG(x) radeon##x##_RGB565
396 #define TAG2(x,y) radeon##x##_RGB565##y
397 #if defined(RADEON_R600)
398 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
399 #else
400 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
401 #endif
402 #include "spantmp2.h"
403
404 #define SPANTMP_PIXEL_FMT GL_RGB
405 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5_REV
406
407 #define TAG(x) radeon##x##_RGB565_REV
408 #define TAG2(x,y) radeon##x##_RGB565_REV##y
409 #if defined(RADEON_R600)
410 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
411 #else
412 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
413 #endif
414 #include "spantmp2.h"
415
416 /* 16 bit, ARGB1555 color spanline and pixel functions
417 */
418 #define SPANTMP_PIXEL_FMT GL_BGRA
419 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
420
421 #define TAG(x) radeon##x##_ARGB1555
422 #define TAG2(x,y) radeon##x##_ARGB1555##y
423 #if defined(RADEON_R600)
424 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
425 #else
426 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
427 #endif
428 #include "spantmp2.h"
429
430 #define SPANTMP_PIXEL_FMT GL_BGRA
431 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5
432
433 #define TAG(x) radeon##x##_ARGB1555_REV
434 #define TAG2(x,y) radeon##x##_ARGB1555_REV##y
435 #if defined(RADEON_R600)
436 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
437 #else
438 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
439 #endif
440 #include "spantmp2.h"
441
442 /* 16 bit, RGBA4 color spanline and pixel functions
443 */
444 #define SPANTMP_PIXEL_FMT GL_BGRA
445 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
446
447 #define TAG(x) radeon##x##_ARGB4444
448 #define TAG2(x,y) radeon##x##_ARGB4444##y
449 #if defined(RADEON_R600)
450 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
451 #else
452 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
453 #endif
454 #include "spantmp2.h"
455
456 #define SPANTMP_PIXEL_FMT GL_BGRA
457 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4
458
459 #define TAG(x) radeon##x##_ARGB4444_REV
460 #define TAG2(x,y) radeon##x##_ARGB4444_REV##y
461 #if defined(RADEON_R600)
462 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
463 #else
464 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
465 #endif
466 #include "spantmp2.h"
467
468 /* 32 bit, xRGB8888 color spanline and pixel functions
469 */
470 #define SPANTMP_PIXEL_FMT GL_BGRA
471 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
472
473 #define TAG(x) radeon##x##_xRGB8888
474 #define TAG2(x,y) radeon##x##_xRGB8888##y
475 #if defined(RADEON_R600)
476 #define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
477 #define PUT_VALUE(_x, _y, d) { \
478 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
479 *_ptr = d; \
480 } while (0)
481 #else
482 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
483 #define PUT_VALUE(_x, _y, d) { \
484 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
485 *_ptr = d; \
486 } while (0)
487 #endif
488 #include "spantmp2.h"
489
490 /* 32 bit, ARGB8888 color spanline and pixel functions
491 */
492 #define SPANTMP_PIXEL_FMT GL_BGRA
493 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
494
495 #define TAG(x) radeon##x##_ARGB8888
496 #define TAG2(x,y) radeon##x##_ARGB8888##y
497 #if defined(RADEON_R600)
498 #define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
499 #define PUT_VALUE(_x, _y, d) { \
500 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
501 *_ptr = d; \
502 } while (0)
503 #else
504 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
505 #define PUT_VALUE(_x, _y, d) { \
506 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
507 *_ptr = d; \
508 } while (0)
509 #endif
510 #include "spantmp2.h"
511
512 /* 32 bit, BGRx8888 color spanline and pixel functions
513 */
514 #define SPANTMP_PIXEL_FMT GL_BGRA
515 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
516
517 #define TAG(x) radeon##x##_BGRx8888
518 #define TAG2(x,y) radeon##x##_BGRx8888##y
519 #if defined(RADEON_R600)
520 #define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
521 #define PUT_VALUE(_x, _y, d) { \
522 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
523 *_ptr = d; \
524 } while (0)
525 #else
526 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
527 #define PUT_VALUE(_x, _y, d) { \
528 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
529 *_ptr = d; \
530 } while (0)
531 #endif
532 #include "spantmp2.h"
533
534 /* 32 bit, BGRA8888 color spanline and pixel functions
535 */
536 #define SPANTMP_PIXEL_FMT GL_BGRA
537 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
538
539 #define TAG(x) radeon##x##_BGRA8888
540 #define TAG2(x,y) radeon##x##_BGRA8888##y
541 #if defined(RADEON_R600)
542 #define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
543 #else
544 #define GET_PTR(X,Y) radeon_ptr_4byte(rrb, (X) + x_off, (Y) + y_off)
545 #endif
546 #include "spantmp2.h"
547
548 /* ================================================================
549 * Depth buffer
550 */
551
552 /* The Radeon family has depth tiling on all the time, so we have to convert
553 * the x,y coordinates into the memory bus address (mba) in the same
554 * manner as the engine. In each case, the linear block address (ba)
555 * is calculated, and then wired with x and y to produce the final
556 * memory address.
557 * The chip will do address translation on its own if the surface registers
558 * are set up correctly. It is not quite enough to get it working with hyperz
559 * too...
560 */
561
562 /* 16-bit depth buffer functions
563 */
564 #define VALUE_TYPE GLushort
565
566 #if defined(RADEON_R200)
567 #define WRITE_DEPTH( _x, _y, d ) \
568 *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
569 #elif defined(RADEON_R600)
570 #define WRITE_DEPTH( _x, _y, d ) \
571 *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
572 #else
573 #define WRITE_DEPTH( _x, _y, d ) \
574 *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
575 #endif
576
577 #if defined(RADEON_R200)
578 #define READ_DEPTH( d, _x, _y ) \
579 d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
580 #elif defined(RADEON_R600)
581 #define READ_DEPTH( d, _x, _y ) \
582 d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
583 #else
584 #define READ_DEPTH( d, _x, _y ) \
585 d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
586 #endif
587
588 #define TAG(x) radeon##x##_z16
589 #include "depthtmp.h"
590
591 /* 24 bit depth
592 *
593 * Careful: It looks like the R300 uses ZZZS byte order while the R200
594 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
595 */
596 #define VALUE_TYPE GLuint
597
598 #if defined(RADEON_R300)
599 #define WRITE_DEPTH( _x, _y, d ) \
600 do { \
601 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
602 GLuint tmp = LE32_TO_CPU(*_ptr); \
603 tmp &= 0x000000ff; \
604 tmp |= ((d << 8) & 0xffffff00); \
605 *_ptr = CPU_TO_LE32(tmp); \
606 } while (0)
607 #elif defined(RADEON_R600)
608 #define WRITE_DEPTH( _x, _y, d ) \
609 do { \
610 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
611 GLuint tmp = *_ptr; \
612 tmp &= 0xff000000; \
613 tmp |= ((d) & 0x00ffffff); \
614 *_ptr = tmp; \
615 } while (0)
616 #elif defined(RADEON_R200)
617 #define WRITE_DEPTH( _x, _y, d ) \
618 do { \
619 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
620 GLuint tmp = LE32_TO_CPU(*_ptr); \
621 tmp &= 0xff000000; \
622 tmp |= ((d) & 0x00ffffff); \
623 *_ptr = CPU_TO_LE32(tmp); \
624 } while (0)
625 #else
626 #define WRITE_DEPTH( _x, _y, d ) \
627 do { \
628 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
629 GLuint tmp = LE32_TO_CPU(*_ptr); \
630 tmp &= 0xff000000; \
631 tmp |= ((d) & 0x00ffffff); \
632 *_ptr = CPU_TO_LE32(tmp); \
633 } while (0)
634 #endif
635
636 #if defined(RADEON_R300)
637 #define READ_DEPTH( d, _x, _y ) \
638 do { \
639 d = (LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0xffffff00) >> 8; \
640 }while(0)
641 #elif defined(RADEON_R600)
642 #define READ_DEPTH( d, _x, _y ) \
643 do { \
644 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
645 }while(0)
646 #elif defined(RADEON_R200)
647 #define READ_DEPTH( d, _x, _y ) \
648 do { \
649 d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
650 }while(0)
651 #else
652 #define READ_DEPTH( d, _x, _y ) \
653 d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff;
654 #endif
655
656 #define TAG(x) radeon##x##_z24
657 #include "depthtmp.h"
658
659 /* 24 bit depth, 8 bit stencil depthbuffer functions
660 * EXT_depth_stencil
661 *
662 * Careful: It looks like the R300 uses ZZZS byte order while the R200
663 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
664 */
665 #define VALUE_TYPE GLuint
666
667 #if defined(RADEON_R300)
668 #define WRITE_DEPTH( _x, _y, d ) \
669 do { \
670 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
671 *_ptr = CPU_TO_LE32((((d) & 0xff000000) >> 24) | (((d) & 0x00ffffff) << 8)); \
672 } while (0)
673 #elif defined(RADEON_R600)
674 #define WRITE_DEPTH( _x, _y, d ) \
675 do { \
676 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
677 GLuint tmp = *_ptr; \
678 tmp &= 0xff000000; \
679 tmp |= ((d) & 0x00ffffff); \
680 *_ptr = tmp; \
681 _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
682 tmp = *_ptr; \
683 tmp &= 0xffffff00; \
684 tmp |= ((d) >> 24) & 0xff; \
685 *_ptr = tmp; \
686 } while (0)
687 #elif defined(RADEON_R200)
688 #define WRITE_DEPTH( _x, _y, d ) \
689 do { \
690 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
691 *_ptr = CPU_TO_LE32(d); \
692 } while (0)
693 #else
694 #define WRITE_DEPTH( _x, _y, d ) \
695 do { \
696 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
697 *_ptr = CPU_TO_LE32(d); \
698 } while (0)
699 #endif
700
701 #if defined(RADEON_R300)
702 #define READ_DEPTH( d, _x, _y ) \
703 do { \
704 GLuint tmp = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
705 d = LE32_TO_CPU(((tmp & 0x000000ff) << 24) | ((tmp & 0xffffff00) >> 8)); \
706 }while(0)
707 #elif defined(RADEON_R600)
708 #define READ_DEPTH( d, _x, _y ) \
709 do { \
710 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
711 d |= ((*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) << 24) & 0xff000000; \
712 }while(0)
713 #elif defined(RADEON_R200)
714 #define READ_DEPTH( d, _x, _y ) \
715 do { \
716 d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
717 }while(0)
718 #else
719 #define READ_DEPTH( d, _x, _y ) do { \
720 d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
721 } while (0)
722 #endif
723
724 #define TAG(x) radeon##x##_s8_z24
725 #include "depthtmp.h"
726
727 /* ================================================================
728 * Stencil buffer
729 */
730
731 /* 24 bit depth, 8 bit stencil depthbuffer functions
732 */
733 #ifdef RADEON_R300
734 #define WRITE_STENCIL( _x, _y, d ) \
735 do { \
736 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
737 GLuint tmp = LE32_TO_CPU(*_ptr); \
738 tmp &= 0xffffff00; \
739 tmp |= (d) & 0xff; \
740 *_ptr = CPU_TO_LE32(tmp); \
741 } while (0)
742 #elif defined(RADEON_R600)
743 #define WRITE_STENCIL( _x, _y, d ) \
744 do { \
745 GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
746 GLuint tmp = *_ptr; \
747 tmp &= 0xffffff00; \
748 tmp |= (d) & 0xff; \
749 *_ptr = tmp; \
750 } while (0)
751 #elif defined(RADEON_R200)
752 #define WRITE_STENCIL( _x, _y, d ) \
753 do { \
754 GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
755 GLuint tmp = LE32_TO_CPU(*_ptr); \
756 tmp &= 0x00ffffff; \
757 tmp |= (((d) & 0xff) << 24); \
758 *_ptr = CPU_TO_LE32(tmp); \
759 } while (0)
760 #else
761 #define WRITE_STENCIL( _x, _y, d ) \
762 do { \
763 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
764 GLuint tmp = LE32_TO_CPU(*_ptr); \
765 tmp &= 0x00ffffff; \
766 tmp |= (((d) & 0xff) << 24); \
767 *_ptr = CPU_TO_LE32(tmp); \
768 } while (0)
769 #endif
770
771 #ifdef RADEON_R300
772 #define READ_STENCIL( d, _x, _y ) \
773 do { \
774 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
775 GLuint tmp = LE32_TO_CPU(*_ptr); \
776 d = tmp & 0x000000ff; \
777 } while (0)
778 #elif defined(RADEON_R600)
779 #define READ_STENCIL( d, _x, _y ) \
780 do { \
781 GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
782 GLuint tmp = *_ptr; \
783 d = tmp & 0x000000ff; \
784 } while (0)
785 #elif defined(RADEON_R200)
786 #define READ_STENCIL( d, _x, _y ) \
787 do { \
788 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
789 GLuint tmp = LE32_TO_CPU(*_ptr); \
790 d = (tmp & 0xff000000) >> 24; \
791 } while (0)
792 #else
793 #define READ_STENCIL( d, _x, _y ) \
794 do { \
795 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
796 GLuint tmp = LE32_TO_CPU(*_ptr); \
797 d = (tmp & 0xff000000) >> 24; \
798 } while (0)
799 #endif
800
801 #define TAG(x) radeon##x##_s8_z24
802 #include "stenciltmp.h"
803
804
805 static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
806 {
807 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
808 int r;
809
810 if (rrb == NULL || !rrb->bo)
811 return;
812
813 if (flag) {
814 radeon_bo_wait(rrb->bo);
815 r = radeon_bo_map(rrb->bo, 1);
816 if (r) {
817 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
818 __FUNCTION__, r);
819 }
820
821 radeonSetSpanFunctions(rrb);
822 } else {
823 radeon_bo_unmap(rrb->bo);
824 rb->GetRow = NULL;
825 rb->PutRow = NULL;
826 }
827 }
828
829 static void
830 radeon_map_unmap_framebuffer(GLcontext *ctx, struct gl_framebuffer *fb,
831 GLboolean map)
832 {
833 GLuint i, j;
834
835 /* color draw buffers */
836 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
837 map_unmap_rb(fb->_ColorDrawBuffers[j], map);
838
839 map_unmap_rb(fb->_ColorReadBuffer, map);
840
841 /* check for render to textures */
842 for (i = 0; i < BUFFER_COUNT; i++) {
843 struct gl_renderbuffer_attachment *att =
844 fb->Attachment + i;
845 struct gl_texture_object *tex = att->Texture;
846 if (tex) {
847 /* Render to texture. Note that a mipmapped texture need not
848 * be complete for render to texture, so we must restrict to
849 * mapping only the attached image.
850 */
851 radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
852 ASSERT(att->Renderbuffer);
853
854 if (map)
855 radeon_teximage_map(image, GL_TRUE);
856 else
857 radeon_teximage_unmap(image);
858 }
859 }
860
861 /* depth buffer (Note wrapper!) */
862 if (fb->_DepthBuffer)
863 map_unmap_rb(fb->_DepthBuffer->Wrapped, map);
864
865 if (fb->_StencilBuffer)
866 map_unmap_rb(fb->_StencilBuffer->Wrapped, map);
867
868 radeon_check_front_buffer_rendering(ctx);
869 }
870
871 static void radeonSpanRenderStart(GLcontext * ctx)
872 {
873 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
874 int i;
875
876 radeon_firevertices(rmesa);
877
878 /* The locking and wait for idle should really only be needed in classic mode.
879 * In a future memory manager based implementation, this should become
880 * unnecessary due to the fact that mapping our buffers, textures, etc.
881 * should implicitly wait for any previous rendering commands that must
882 * be waited on. */
883 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
884 LOCK_HARDWARE(rmesa);
885 radeonWaitForIdleLocked(rmesa);
886 }
887
888 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
889 if (ctx->Texture.Unit[i]._ReallyEnabled)
890 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
891 }
892
893 radeon_map_unmap_framebuffer(ctx, ctx->DrawBuffer, GL_TRUE);
894 if (ctx->ReadBuffer != ctx->DrawBuffer)
895 radeon_map_unmap_framebuffer(ctx, ctx->ReadBuffer, GL_TRUE);
896 }
897
898 static void radeonSpanRenderFinish(GLcontext * ctx)
899 {
900 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
901 int i;
902
903 _swrast_flush(ctx);
904
905 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
906 if (ctx->Texture.Unit[i]._ReallyEnabled)
907 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
908 }
909
910 radeon_map_unmap_framebuffer(ctx, ctx->DrawBuffer, GL_FALSE);
911 if (ctx->ReadBuffer != ctx->DrawBuffer)
912 radeon_map_unmap_framebuffer(ctx, ctx->ReadBuffer, GL_FALSE);
913
914 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
915 UNLOCK_HARDWARE(rmesa);
916 }
917 }
918
919 void radeonInitSpanFuncs(GLcontext * ctx)
920 {
921 struct swrast_device_driver *swdd =
922 _swrast_GetDeviceDriverReference(ctx);
923 swdd->SpanRenderStart = radeonSpanRenderStart;
924 swdd->SpanRenderFinish = radeonSpanRenderFinish;
925 }
926
927 /**
928 * Plug in the Get/Put routines for the given driRenderbuffer.
929 */
930 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
931 {
932 if (rrb->base.Format == MESA_FORMAT_RGB565) {
933 radeonInitPointers_RGB565(&rrb->base);
934 } else if (rrb->base.Format == MESA_FORMAT_RGB565_REV) {
935 radeonInitPointers_RGB565_REV(&rrb->base);
936 } else if (rrb->base.Format == MESA_FORMAT_XRGB8888) {
937 radeonInitPointers_xRGB8888(&rrb->base);
938 } else if (rrb->base.Format == MESA_FORMAT_XRGB8888_REV) {
939 radeonInitPointers_BGRx8888(&rrb->base);
940 } else if (rrb->base.Format == MESA_FORMAT_ARGB8888) {
941 radeonInitPointers_ARGB8888(&rrb->base);
942 } else if (rrb->base.Format == MESA_FORMAT_ARGB8888_REV) {
943 radeonInitPointers_BGRA8888(&rrb->base);
944 } else if (rrb->base.Format == MESA_FORMAT_ARGB4444) {
945 radeonInitPointers_ARGB4444(&rrb->base);
946 } else if (rrb->base.Format == MESA_FORMAT_ARGB4444_REV) {
947 radeonInitPointers_ARGB4444_REV(&rrb->base);
948 } else if (rrb->base.Format == MESA_FORMAT_ARGB1555) {
949 radeonInitPointers_ARGB1555(&rrb->base);
950 } else if (rrb->base.Format == MESA_FORMAT_ARGB1555_REV) {
951 radeonInitPointers_ARGB1555_REV(&rrb->base);
952 } else if (rrb->base.Format == MESA_FORMAT_Z16) {
953 radeonInitDepthPointers_z16(&rrb->base);
954 } else if (rrb->base.Format == MESA_FORMAT_X8_Z24) {
955 radeonInitDepthPointers_z24(&rrb->base);
956 } else if (rrb->base.Format == MESA_FORMAT_S8_Z24) {
957 radeonInitDepthPointers_s8_z24(&rrb->base);
958 } else if (rrb->base.Format == MESA_FORMAT_S8) {
959 radeonInitStencilPointers_s8_z24(&rrb->base);
960 } else {
961 fprintf(stderr, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb->base.Format);
962 }
963 }