raedon/r200/r300: mega-FBO commits.
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "swrast/swrast.h"
45
46 #include "radeon_common.h"
47 #include "radeon_lock.h"
48 #include "radeon_span.h"
49
50 #define DBG 0
51
52 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54 static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
55 GLint x, GLint y)
56 {
57 GLubyte *ptr = rrb->bo->ptr;
58 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
59 GLint offset;
60 GLint nmacroblkpl;
61 GLint nmicroblkpl;
62
63 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
64 offset = x * rrb->cpp + y * rrb->pitch;
65 } else {
66 offset = 0;
67 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
68 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
69 nmacroblkpl = rrb->pitch >> 5;
70 offset += ((y >> 4) * nmacroblkpl) << 11;
71 offset += ((y & 15) >> 1) << 8;
72 offset += (y & 1) << 4;
73 offset += (x >> 5) << 11;
74 offset += ((x & 31) >> 2) << 5;
75 offset += (x & 3) << 2;
76 } else {
77 nmacroblkpl = rrb->pitch >> 6;
78 offset += ((y >> 3) * nmacroblkpl) << 11;
79 offset += (y & 7) << 8;
80 offset += (x >> 6) << 11;
81 offset += ((x & 63) >> 3) << 5;
82 offset += (x & 7) << 2;
83 }
84 } else {
85 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
86 offset += (y * nmicroblkpl) << 5;
87 offset += (x >> 3) << 5;
88 offset += (x & 7) << 2;
89 }
90 }
91 return &ptr[offset];
92 }
93
94 static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
95 GLint x, GLint y)
96 {
97 GLubyte *ptr = rrb->bo->ptr;
98 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
99 GLint offset;
100 GLint nmacroblkpl;
101 GLint nmicroblkpl;
102
103 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
104 offset = x * rrb->cpp + y * rrb->pitch;
105 } else {
106 offset = 0;
107 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
108 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
109 nmacroblkpl = rrb->pitch >> 6;
110 offset += ((y >> 4) * nmacroblkpl) << 11;
111 offset += ((y & 15) >> 1) << 8;
112 offset += (y & 1) << 4;
113 offset += (x >> 6) << 11;
114 offset += ((x & 63) >> 3) << 5;
115 offset += (x & 7) << 1;
116 } else {
117 nmacroblkpl = rrb->pitch >> 7;
118 offset += ((y >> 3) * nmacroblkpl) << 11;
119 offset += (y & 7) << 8;
120 offset += (x >> 7) << 11;
121 offset += ((x & 127) >> 4) << 5;
122 offset += (x & 15) << 2;
123 }
124 } else {
125 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
126 offset += (y * nmicroblkpl) << 5;
127 offset += (x >> 4) << 5;
128 offset += (x & 15) << 2;
129 }
130 }
131 return &ptr[offset];
132 }
133
134 static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
135 GLint x, GLint y)
136 {
137 GLubyte *ptr = rrb->bo->ptr;
138 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
139 GLint offset;
140 GLint microblkxs;
141 GLint macroblkxs;
142 GLint nmacroblkpl;
143 GLint nmicroblkpl;
144
145 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
146 offset = x * rrb->cpp + y * rrb->pitch;
147 } else {
148 offset = 0;
149 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
150 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
151 microblkxs = 16 / rrb->cpp;
152 macroblkxs = 128 / rrb->cpp;
153 nmacroblkpl = rrb->pitch / macroblkxs;
154 offset += ((y >> 4) * nmacroblkpl) << 11;
155 offset += ((y & 15) >> 1) << 8;
156 offset += (y & 1) << 4;
157 offset += (x / macroblkxs) << 11;
158 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
159 offset += (x & (microblkxs - 1)) * rrb->cpp;
160 } else {
161 microblkxs = 32 / rrb->cpp;
162 macroblkxs = 256 / rrb->cpp;
163 nmacroblkpl = rrb->pitch / macroblkxs;
164 offset += ((y >> 3) * nmacroblkpl) << 11;
165 offset += (y & 7) << 8;
166 offset += (x / macroblkxs) << 11;
167 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
168 offset += (x & (microblkxs - 1)) * rrb->cpp;
169 }
170 } else {
171 microblkxs = 32 / rrb->cpp;
172 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
173 offset += (y * nmicroblkpl) << 5;
174 offset += (x / microblkxs) << 5;
175 offset += (x & (microblkxs - 1)) * rrb->cpp;
176 }
177 }
178 return &ptr[offset];
179 }
180
181
182 /*
183 * Note that all information needed to access pixels in a renderbuffer
184 * should be obtained through the gl_renderbuffer parameter, not per-context
185 * information.
186 */
187 #define LOCAL_VARS \
188 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
189 struct radeon_renderbuffer *rrb = (void *) rb; \
190 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
191 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
192 unsigned int num_cliprects; \
193 struct drm_clip_rect *cliprects; \
194 int x_off, y_off; \
195 GLuint p; \
196 (void)p; \
197 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
198
199 #define LOCAL_DEPTH_VARS \
200 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
201 struct radeon_renderbuffer *rrb = (void *) rb; \
202 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
203 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
204 unsigned int num_cliprects; \
205 struct drm_clip_rect *cliprects; \
206 int x_off, y_off; \
207 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
208
209 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
210
211 #define Y_FLIP(_y) ((_y) * yScale + yBias)
212
213 #define HW_LOCK()
214
215 #define HW_UNLOCK()
216
217 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
218 * the cliprect info from the context, not the driDrawable.
219 * Move this into spantmp2.h someday.
220 */
221 #define HW_CLIPLOOP() \
222 do { \
223 int _nc = num_cliprects; \
224 while ( _nc-- ) { \
225 int minx = cliprects[_nc].x1 - x_off; \
226 int miny = cliprects[_nc].y1 - y_off; \
227 int maxx = cliprects[_nc].x2 - x_off; \
228 int maxy = cliprects[_nc].y2 - y_off;
229
230 /* ================================================================
231 * Color buffer
232 */
233
234 /* 16 bit, RGB565 color spanline and pixel functions
235 */
236 #define SPANTMP_PIXEL_FMT GL_RGB
237 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
238
239 #define TAG(x) radeon##x##_RGB565
240 #define TAG2(x,y) radeon##x##_RGB565##y
241 #define GET_PTR(X,Y) radeon_ptr16(rrb, (X) + x_off, (Y) + y_off)
242 #include "spantmp2.h"
243
244 /* 32 bit, xRGB8888 color spanline and pixel functions
245 */
246 #define SPANTMP_PIXEL_FMT GL_BGRA
247 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
248
249 #define TAG(x) radeon##x##_xRGB8888
250 #define TAG2(x,y) radeon##x##_xRGB8888##y
251 #define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off)
252 #include "spantmp2.h"
253
254 /* 32 bit, ARGB8888 color spanline and pixel functions
255 */
256 #define SPANTMP_PIXEL_FMT GL_BGRA
257 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
258
259 #define TAG(x) radeon##x##_ARGB8888
260 #define TAG2(x,y) radeon##x##_ARGB8888##y
261 #define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off)
262 #include "spantmp2.h"
263
264 /* ================================================================
265 * Depth buffer
266 */
267
268 /* The Radeon family has depth tiling on all the time, so we have to convert
269 * the x,y coordinates into the memory bus address (mba) in the same
270 * manner as the engine. In each case, the linear block address (ba)
271 * is calculated, and then wired with x and y to produce the final
272 * memory address.
273 * The chip will do address translation on its own if the surface registers
274 * are set up correctly. It is not quite enough to get it working with hyperz
275 * too...
276 */
277
278 /* 16-bit depth buffer functions
279 */
280 #define VALUE_TYPE GLushort
281
282 #define WRITE_DEPTH( _x, _y, d ) \
283 *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) = d
284
285 #define READ_DEPTH( d, _x, _y ) \
286 d = *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off)
287
288 #define TAG(x) radeon##x##_z16
289 #include "depthtmp.h"
290
291 /* 24 bit depth, 8 bit stencil depthbuffer functions
292 *
293 * Careful: It looks like the R300 uses ZZZS byte order while the R200
294 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
295 */
296 #define VALUE_TYPE GLuint
297
298 #ifdef COMPILE_R300
299 #define WRITE_DEPTH( _x, _y, d ) \
300 do { \
301 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
302 GLuint tmp = *_ptr; \
303 tmp &= 0x000000ff; \
304 tmp |= ((d << 8) & 0xffffff00); \
305 *_ptr = tmp; \
306 } while (0)
307 #else
308 #define WRITE_DEPTH( _x, _y, d ) \
309 do { \
310 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
311 GLuint tmp = *_ptr; \
312 tmp &= 0xff000000; \
313 tmp |= ((d) & 0x00ffffff); \
314 *_ptr = tmp; \
315 } while (0)
316 #endif
317
318 #ifdef COMPILE_R300
319 #define READ_DEPTH( d, _x, _y ) \
320 do { \
321 d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
322 }while(0)
323 #else
324 #define READ_DEPTH( d, _x, _y ) \
325 d = *(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off )) & 0x00ffffff;
326 #endif
327 /*
328 fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
329 d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff;
330 */
331 #define TAG(x) radeon##x##_z24_s8
332 #include "depthtmp.h"
333
334 /* ================================================================
335 * Stencil buffer
336 */
337
338 /* 24 bit depth, 8 bit stencil depthbuffer functions
339 */
340 #ifdef COMPILE_R300
341 #define WRITE_STENCIL( _x, _y, d ) \
342 do { \
343 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \
344 GLuint tmp = *_ptr; \
345 tmp &= 0xffffff00; \
346 tmp |= (d) & 0xff; \
347 *_ptr = tmp; \
348 } while (0)
349 #else
350 #define WRITE_STENCIL( _x, _y, d ) \
351 do { \
352 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \
353 GLuint tmp = *_ptr; \
354 tmp &= 0x00ffffff; \
355 tmp |= (((d) & 0xff) << 24); \
356 *_ptr = tmp; \
357 } while (0)
358 #endif
359
360 #ifdef COMPILE_R300
361 #define READ_STENCIL( d, _x, _y ) \
362 do { \
363 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
364 GLuint tmp = *_ptr; \
365 d = tmp & 0x000000ff; \
366 } while (0)
367 #else
368 #define READ_STENCIL( d, _x, _y ) \
369 do { \
370 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
371 GLuint tmp = *_ptr; \
372 d = (tmp & 0xff000000) >> 24; \
373 } while (0)
374 #endif
375
376 #define TAG(x) radeon##x##_z24_s8
377 #include "stenciltmp.h"
378
379
380 void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
381 {
382 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
383 int r;
384
385 if (rrb == NULL || !rrb->bo)
386 return;
387
388 if (flag) {
389 r = radeon_bo_map(rrb->bo, 1);
390 if (r) {
391 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
392 __FUNCTION__, r);
393 }
394
395 radeonSetSpanFunctions(rrb);
396 } else {
397 radeon_bo_unmap(rrb->bo);
398 rb->GetRow = NULL;
399 rb->PutRow = NULL;
400 }
401 }
402
403 static void
404 radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
405 {
406 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
407 GLuint i, j;
408
409 /* color draw buffers */
410 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
411 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
412
413 /* check for render to textures */
414 for (i = 0; i < BUFFER_COUNT; i++) {
415 struct gl_renderbuffer_attachment *att =
416 ctx->DrawBuffer->Attachment + i;
417 struct gl_texture_object *tex = att->Texture;
418 if (tex) {
419 /* render to texture */
420 ASSERT(att->Renderbuffer);
421 if (map)
422 ctx->Driver.MapTexture(ctx, tex);
423 else
424 ctx->Driver.UnmapTexture(ctx, tex);
425 }
426 }
427
428 map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
429
430 /* depth buffer (Note wrapper!) */
431 if (ctx->DrawBuffer->_DepthBuffer)
432 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
433
434 if (ctx->DrawBuffer->_StencilBuffer)
435 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
436
437 }
438 static void radeonSpanRenderStart(GLcontext * ctx)
439 {
440 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
441 int i;
442
443 radeon_firevertices(rmesa);
444
445 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
446 if (ctx->Texture.Unit[i]._ReallyEnabled)
447 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
448 }
449
450 radeon_map_unmap_buffers(ctx, 1);
451
452 /* The locking and wait for idle should really only be needed in classic mode.
453 * In a future memory manager based implementation, this should become
454 * unnecessary due to the fact that mapping our buffers, textures, etc.
455 * should implicitly wait for any previous rendering commands that must
456 * be waited on. */
457 LOCK_HARDWARE(rmesa);
458 radeonWaitForIdleLocked(rmesa);
459 }
460
461 static void radeonSpanRenderFinish(GLcontext * ctx)
462 {
463 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
464 int i;
465 _swrast_flush(ctx);
466 UNLOCK_HARDWARE(rmesa);
467
468 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
469 if (ctx->Texture.Unit[i]._ReallyEnabled)
470 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
471 }
472
473 radeon_map_unmap_buffers(ctx, 0);
474 }
475
476 void radeonInitSpanFuncs(GLcontext * ctx)
477 {
478 struct swrast_device_driver *swdd =
479 _swrast_GetDeviceDriverReference(ctx);
480 swdd->SpanRenderStart = radeonSpanRenderStart;
481 swdd->SpanRenderFinish = radeonSpanRenderFinish;
482 }
483
484 /**
485 * Plug in the Get/Put routines for the given driRenderbuffer.
486 */
487 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
488 {
489 if (rrb->base._ActualFormat == GL_RGB5) {
490 radeonInitPointers_RGB565(&rrb->base);
491 } else if (rrb->base._ActualFormat == GL_RGB8) {
492 radeonInitPointers_xRGB8888(&rrb->base);
493 } else if (rrb->base._ActualFormat == GL_RGBA8) {
494 radeonInitPointers_ARGB8888(&rrb->base);
495 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
496 radeonInitDepthPointers_z16(&rrb->base);
497 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
498 radeonInitDepthPointers_z24_s8(&rrb->base);
499 } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
500 radeonInitStencilPointers_z24_s8(&rrb->base);
501 } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
502 radeonInitStencilPointers_z24_s8(&rrb->base);
503 }
504 }