30dde8099485062f55e9aa3dc5ca30f572ef57d9
[mesa.git] / src / mesa / drivers / dri / r300 / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "swrast/swrast.h"
45
46 #include "r300_state.h"
47 #include "radeon_ioctl.h"
48 #include "r300_ioctl.h"
49 #include "radeon_span.h"
50
51 #include "radeon_buffer.h"
52
53 #define DBG 0
54
55 /*
56 * Note that all information needed to access pixels in a renderbuffer
57 * should be obtained through the gl_renderbuffer parameter, not per-context
58 * information.
59 */
60 #define LOCAL_VARS \
61 struct radeon_renderbuffer *rrb = (void *) rb; \
62 const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
63 const GLuint bottom = dPriv->h - 1; \
64 GLuint p; \
65 (void)p;
66
67 #define LOCAL_DEPTH_VARS \
68 struct radeon_renderbuffer *rrb = (void *) rb; \
69 const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
70 const GLuint bottom = dPriv->h - 1; \
71 GLuint xo = dPriv->x; \
72 GLuint yo = dPriv->y;
73
74 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
75
76 #define Y_FLIP(Y) (bottom - (Y))
77
78 #define HW_LOCK()
79
80 #define HW_UNLOCK()
81
82 static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
83 GLint x, GLint y)
84 {
85 GLubyte *ptr = rrb->bo->ptr;
86 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
87 GLint offset;
88 GLint nmacroblkpl;
89 GLint nmicroblkpl;
90
91 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
92 offset = x * rrb->cpp + y * rrb->pitch;
93 } else {
94 offset = 0;
95 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
96 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
97 nmacroblkpl = rrb->pitch >> 5;
98 offset += ((y >> 4) * nmacroblkpl) << 11;
99 offset += ((y & 15) >> 1) << 8;
100 offset += (y & 1) << 4;
101 offset += (x >> 5) << 11;
102 offset += ((x & 31) >> 2) << 5;
103 offset += (x & 3) << 2;
104 } else {
105 nmacroblkpl = rrb->pitch >> 6;
106 offset += ((y >> 3) * nmacroblkpl) << 11;
107 offset += (y & 7) << 8;
108 offset += (x >> 6) << 11;
109 offset += ((x & 63) >> 3) << 5;
110 offset += (x & 7) << 2;
111 }
112 } else {
113 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
114 offset += (y * nmicroblkpl) << 5;
115 offset += (x >> 3) << 5;
116 offset += (x & 7) << 2;
117 }
118 }
119 return &ptr[offset];
120 }
121
122 static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
123 GLint x, GLint y)
124 {
125 GLubyte *ptr = rrb->bo->ptr;
126 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
127 GLint offset;
128 GLint nmacroblkpl;
129 GLint nmicroblkpl;
130
131 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
132 offset = x * rrb->cpp + y * rrb->pitch;
133 } else {
134 offset = 0;
135 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
136 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
137 nmacroblkpl = rrb->pitch >> 6;
138 offset += ((y >> 4) * nmacroblkpl) << 11;
139 offset += ((y & 15) >> 1) << 8;
140 offset += (y & 1) << 4;
141 offset += (x >> 6) << 11;
142 offset += ((x & 63) >> 3) << 5;
143 offset += (x & 7) << 1;
144 } else {
145 nmacroblkpl = rrb->pitch >> 7;
146 offset += ((y >> 3) * nmacroblkpl) << 11;
147 offset += (y & 7) << 8;
148 offset += (x >> 7) << 11;
149 offset += ((x & 127) >> 4) << 5;
150 offset += (x & 15) << 2;
151 }
152 } else {
153 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
154 offset += (y * nmicroblkpl) << 5;
155 offset += (x >> 4) << 5;
156 offset += (x & 15) << 2;
157 }
158 }
159 return &ptr[offset];
160 }
161
162 static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
163 GLint x, GLint y)
164 {
165 GLubyte *ptr = rrb->bo->ptr;
166 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
167 GLint offset;
168 GLint microblkxs;
169 GLint macroblkxs;
170 GLint nmacroblkpl;
171 GLint nmicroblkpl;
172
173 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
174 offset = x * rrb->cpp + y * rrb->pitch;
175 } else {
176 offset = 0;
177 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
178 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
179 microblkxs = 16 / rrb->cpp;
180 macroblkxs = 128 / rrb->cpp;
181 nmacroblkpl = rrb->pitch / macroblkxs;
182 offset += ((y >> 4) * nmacroblkpl) << 11;
183 offset += ((y & 15) >> 1) << 8;
184 offset += (y & 1) << 4;
185 offset += (x / macroblkxs) << 11;
186 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
187 offset += (x & (microblkxs - 1)) * rrb->cpp;
188 } else {
189 microblkxs = 32 / rrb->cpp;
190 macroblkxs = 256 / rrb->cpp;
191 nmacroblkpl = rrb->pitch / macroblkxs;
192 offset += ((y >> 3) * nmacroblkpl) << 11;
193 offset += (y & 7) << 8;
194 offset += (x / macroblkxs) << 11;
195 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
196 offset += (x & (microblkxs - 1)) * rrb->cpp;
197 }
198 } else {
199 microblkxs = 32 / rrb->cpp;
200 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
201 offset += (y * nmicroblkpl) << 5;
202 offset += (x / microblkxs) << 5;
203 offset += (x & (microblkxs - 1)) * rrb->cpp;
204 }
205 }
206 return &ptr[offset];
207 }
208
209 /* ================================================================
210 * Color buffer
211 */
212
213 /* 16 bit, RGB565 color spanline and pixel functions
214 */
215 #define SPANTMP_PIXEL_FMT GL_RGB
216 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
217
218 #define TAG(x) radeon##x##_RGB565
219 #define TAG2(x,y) radeon##x##_RGB565##y
220 #define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
221 #include "spantmp2.h"
222
223 /* 32 bit, ARGB8888 color spanline and pixel functions
224 */
225 #define SPANTMP_PIXEL_FMT GL_BGRA
226 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
227
228 #define TAG(x) radeon##x##_ARGB8888
229 #define TAG2(x,y) radeon##x##_ARGB8888##y
230 #define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
231 #include "spantmp2.h"
232
233 /* ================================================================
234 * Depth buffer
235 */
236
237 /* The Radeon family has depth tiling on all the time, so we have to convert
238 * the x,y coordinates into the memory bus address (mba) in the same
239 * manner as the engine. In each case, the linear block address (ba)
240 * is calculated, and then wired with x and y to produce the final
241 * memory address.
242 * The chip will do address translation on its own if the surface registers
243 * are set up correctly. It is not quite enough to get it working with hyperz
244 * too...
245 */
246
247 /* 16-bit depth buffer functions
248 */
249 #define VALUE_TYPE GLushort
250
251 #define WRITE_DEPTH( _x, _y, d ) \
252 *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo) = d
253
254 #define READ_DEPTH( d, _x, _y ) \
255 d = *(GLushort *)radeon_ptr(rrb, _x + xo, _y + yo)
256
257 #define TAG(x) radeon##x##_z16
258 #include "depthtmp.h"
259
260 /* 24 bit depth, 8 bit stencil depthbuffer functions
261 *
262 * Careful: It looks like the R300 uses ZZZS byte order while the R200
263 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
264 */
265 #define VALUE_TYPE GLuint
266
267 #ifdef COMPILE_R300
268 #define WRITE_DEPTH( _x, _y, d ) \
269 do { \
270 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + xo, _y + yo ); \
271 GLuint tmp = *_ptr; \
272 tmp &= 0x000000ff; \
273 tmp |= ((d << 8) & 0xffffff00); \
274 *_ptr = tmp; \
275 } while (0)
276 #else
277 #define WRITE_DEPTH( _x, _y, d ) \
278 do { \
279 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + xo, _y + yo ); \
280 GLuint tmp = *_ptr; \
281 tmp &= 0xff000000; \
282 tmp |= ((d) & 0x00ffffff); \
283 *_ptr = tmp; \
284 } while (0)
285 #endif
286
287 #ifdef COMPILE_R300
288 #define READ_DEPTH( d, _x, _y ) \
289 do { \
290 d = (*(GLuint*)(radeon_ptr32(rrb, _x + xo, _y + yo)) & 0xffffff00) >> 8; \
291 }while(0)
292 #else
293 #define READ_DEPTH( d, _x, _y ) \
294 d = *(GLuint*)(radeon_ptr32(rrb, _x + xo, _y + yo )) & 0x00ffffff;
295 #endif
296 /*
297 fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
298 d = *(GLuint*)(radeon_ptr(rrb, _x + xo, _y + yo )) & 0x00ffffff;
299 */
300 #define TAG(x) radeon##x##_z24_s8
301 #include "depthtmp.h"
302
303 /* ================================================================
304 * Stencil buffer
305 */
306
307 /* 24 bit depth, 8 bit stencil depthbuffer functions
308 */
309 #ifdef COMPILE_R300
310 #define WRITE_STENCIL( _x, _y, d ) \
311 do { \
312 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo); \
313 GLuint tmp = *_ptr; \
314 tmp &= 0xffffff00; \
315 tmp |= (d) & 0xff; \
316 *_ptr = tmp; \
317 } while (0)
318 #else
319 #define WRITE_STENCIL( _x, _y, d ) \
320 do { \
321 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + xo, _y + yo); \
322 GLuint tmp = *_ptr; \
323 tmp &= 0x00ffffff; \
324 tmp |= (((d) & 0xff) << 24); \
325 *_ptr = tmp; \
326 } while (0)
327 #endif
328
329 #ifdef COMPILE_R300
330 #define READ_STENCIL( d, _x, _y ) \
331 do { \
332 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + xo, _y + yo ); \
333 GLuint tmp = *_ptr; \
334 d = tmp & 0x000000ff; \
335 } while (0)
336 #else
337 #define READ_STENCIL( d, _x, _y ) \
338 do { \
339 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + xo, _y + yo ); \
340 GLuint tmp = *_ptr; \
341 d = (tmp & 0xff000000) >> 24; \
342 } while (0)
343 #endif
344
345 #define TAG(x) radeon##x##_z24_s8
346 #include "stenciltmp.h"
347
348 static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
349 {
350 struct radeon_renderbuffer *rrb = (void*)rb;
351 int r;
352
353 if (rrb->bo) {
354 r = radeon_bo_map(rrb->bo, write);
355 if (r) {
356 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
357 __FUNCTION__, r);
358 }
359 }
360 }
361
362 static void unmap_buffer(struct gl_renderbuffer *rb)
363 {
364 struct radeon_renderbuffer *rrb = (void*)rb;
365
366 if (rrb->bo) {
367 radeon_bo_unmap(rrb->bo);
368 }
369 }
370
371 /* Move locking out to get reasonable span performance (10x better
372 * than doing this in HW_LOCK above). WaitForIdle() is the main
373 * culprit.
374 */
375
376 static void radeonSpanRenderStart(GLcontext * ctx)
377 {
378 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
379 int i;
380 #ifdef COMPILE_R300
381 r300ContextPtr r300 = (r300ContextPtr) rmesa;
382 R300_FIREVERTICES(r300);
383 #else
384 RADEON_FIREVERTICES(rmesa);
385 #endif
386
387 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
388 if (ctx->Texture.Unit[i]._ReallyEnabled)
389 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
390 }
391
392 /* color draw buffers */
393 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
394 map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
395 }
396
397 map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
398
399 if (ctx->DrawBuffer->_DepthBuffer) {
400 map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
401 }
402 if (ctx->DrawBuffer->_StencilBuffer)
403 map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
404
405 /* The locking and wait for idle should really only be needed in classic mode.
406 * In a future memory manager based implementation, this should become
407 * unnecessary due to the fact that mapping our buffers, textures, etc.
408 * should implicitly wait for any previous rendering commands that must
409 * be waited on. */
410 LOCK_HARDWARE(rmesa);
411 radeonWaitForIdleLocked(rmesa);
412 }
413
414 static void radeonSpanRenderFinish(GLcontext * ctx)
415 {
416 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
417 int i;
418 _swrast_flush(ctx);
419 UNLOCK_HARDWARE(rmesa);
420
421 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
422 if (ctx->Texture.Unit[i]._ReallyEnabled)
423 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
424 }
425
426 /* color draw buffers */
427 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
428 unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
429
430 unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
431
432 if (ctx->DrawBuffer->_DepthBuffer)
433 unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
434 if (ctx->DrawBuffer->_StencilBuffer)
435 unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
436 }
437
438 void radeonInitSpanFuncs(GLcontext * ctx)
439 {
440 struct swrast_device_driver *swdd =
441 _swrast_GetDeviceDriverReference(ctx);
442 swdd->SpanRenderStart = radeonSpanRenderStart;
443 swdd->SpanRenderFinish = radeonSpanRenderFinish;
444 }
445
446 /**
447 * Plug in the Get/Put routines for the given driRenderbuffer.
448 */
449 void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
450 {
451 if (rrb->base.InternalFormat == GL_RGB5) {
452 radeonInitPointers_RGB565(&rrb->base);
453 } else if (rrb->base.InternalFormat == GL_RGBA8) {
454 radeonInitPointers_ARGB8888(&rrb->base);
455 } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
456 radeonInitDepthPointers_z16(&rrb->base);
457 } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
458 radeonInitDepthPointers_z24_s8(&rrb->base);
459 } else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
460 radeonInitStencilPointers_z24_s8(&rrb->base);
461 }
462 }