radeon: add cpp/pitch to rrb
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "swrast/swrast.h"
45
46 #include "radeon_common.h"
47 #include "radeon_lock.h"
48 #include "radeon_span.h"
49
50 #define DBG 0
51
52 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54 static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
55 GLint x, GLint y)
56 {
57 GLubyte *ptr = rrb->bo->ptr;
58 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
59 GLint offset;
60 GLint nmacroblkpl;
61 GLint nmicroblkpl;
62
63 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
64 offset = x * rrb->cpp + y * rrb->pitch;
65 } else {
66 offset = 0;
67 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
68 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
69 nmacroblkpl = rrb->pitch >> 5;
70 offset += ((y >> 4) * nmacroblkpl) << 11;
71 offset += ((y & 15) >> 1) << 8;
72 offset += (y & 1) << 4;
73 offset += (x >> 5) << 11;
74 offset += ((x & 31) >> 2) << 5;
75 offset += (x & 3) << 2;
76 } else {
77 nmacroblkpl = rrb->pitch >> 6;
78 offset += ((y >> 3) * nmacroblkpl) << 11;
79 offset += (y & 7) << 8;
80 offset += (x >> 6) << 11;
81 offset += ((x & 63) >> 3) << 5;
82 offset += (x & 7) << 2;
83 }
84 } else {
85 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
86 offset += (y * nmicroblkpl) << 5;
87 offset += (x >> 3) << 5;
88 offset += (x & 7) << 2;
89 }
90 }
91 return &ptr[offset];
92 }
93
94 static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
95 GLint x, GLint y)
96 {
97 GLubyte *ptr = rrb->bo->ptr;
98 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
99 GLint offset;
100 GLint nmacroblkpl;
101 GLint nmicroblkpl;
102
103 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
104 offset = x * rrb->cpp + y * rrb->pitch;
105 } else {
106 offset = 0;
107 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
108 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
109 nmacroblkpl = rrb->pitch >> 6;
110 offset += ((y >> 4) * nmacroblkpl) << 11;
111 offset += ((y & 15) >> 1) << 8;
112 offset += (y & 1) << 4;
113 offset += (x >> 6) << 11;
114 offset += ((x & 63) >> 3) << 5;
115 offset += (x & 7) << 1;
116 } else {
117 nmacroblkpl = rrb->pitch >> 7;
118 offset += ((y >> 3) * nmacroblkpl) << 11;
119 offset += (y & 7) << 8;
120 offset += (x >> 7) << 11;
121 offset += ((x & 127) >> 4) << 5;
122 offset += (x & 15) << 2;
123 }
124 } else {
125 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
126 offset += (y * nmicroblkpl) << 5;
127 offset += (x >> 4) << 5;
128 offset += (x & 15) << 2;
129 }
130 }
131 return &ptr[offset];
132 }
133
134 static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
135 GLint x, GLint y)
136 {
137 GLubyte *ptr = rrb->bo->ptr;
138 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
139 GLint offset;
140 GLint microblkxs;
141 GLint macroblkxs;
142 GLint nmacroblkpl;
143 GLint nmicroblkpl;
144
145 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
146 offset = x * rrb->cpp + y * rrb->pitch;
147 } else {
148 offset = 0;
149 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
150 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
151 microblkxs = 16 / rrb->cpp;
152 macroblkxs = 128 / rrb->cpp;
153 nmacroblkpl = rrb->pitch / macroblkxs;
154 offset += ((y >> 4) * nmacroblkpl) << 11;
155 offset += ((y & 15) >> 1) << 8;
156 offset += (y & 1) << 4;
157 offset += (x / macroblkxs) << 11;
158 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
159 offset += (x & (microblkxs - 1)) * rrb->cpp;
160 } else {
161 microblkxs = 32 / rrb->cpp;
162 macroblkxs = 256 / rrb->cpp;
163 nmacroblkpl = rrb->pitch / macroblkxs;
164 offset += ((y >> 3) * nmacroblkpl) << 11;
165 offset += (y & 7) << 8;
166 offset += (x / macroblkxs) << 11;
167 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
168 offset += (x & (microblkxs - 1)) * rrb->cpp;
169 }
170 } else {
171 microblkxs = 32 / rrb->cpp;
172 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
173 offset += (y * nmicroblkpl) << 5;
174 offset += (x / microblkxs) << 5;
175 offset += (x & (microblkxs - 1)) * rrb->cpp;
176 }
177 }
178 return &ptr[offset];
179 }
180
181
182 /*
183 * Note that all information needed to access pixels in a renderbuffer
184 * should be obtained through the gl_renderbuffer parameter, not per-context
185 * information.
186 */
187 #define LOCAL_VARS \
188 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
189 struct radeon_renderbuffer *rrb = (void *) rb; \
190 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
191 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
192 unsigned int num_cliprects; \
193 struct drm_clip_rect *cliprects; \
194 int x_off, y_off; \
195 GLuint p; \
196 (void)p; \
197 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
198
199 #define LOCAL_DEPTH_VARS \
200 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
201 struct radeon_renderbuffer *rrb = (void *) rb; \
202 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
203 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
204 unsigned int num_cliprects; \
205 struct drm_clip_rect *cliprects; \
206 int x_off, y_off; \
207 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
208
209 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
210
211 #define Y_FLIP(_y) ((_y) * yScale + yBias)
212
213 #define HW_LOCK()
214
215 #define HW_UNLOCK()
216
217 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
218 * the cliprect info from the context, not the driDrawable.
219 * Move this into spantmp2.h someday.
220 */
221 #define HW_CLIPLOOP() \
222 do { \
223 int _nc = num_cliprects; \
224 while ( _nc-- ) { \
225 int minx = cliprects[_nc].x1 - x_off; \
226 int miny = cliprects[_nc].y1 - y_off; \
227 int maxx = cliprects[_nc].x2 - x_off; \
228 int maxy = cliprects[_nc].y2 - y_off;
229
230 /* ================================================================
231 * Color buffer
232 */
233
234 /* 16 bit, RGB565 color spanline and pixel functions
235 */
236 #define SPANTMP_PIXEL_FMT GL_RGB
237 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
238
239 #define TAG(x) radeon##x##_RGB565
240 #define TAG2(x,y) radeon##x##_RGB565##y
241 #define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
242 #include "spantmp2.h"
243
244 /* 32 bit, ARGB8888 color spanline and pixel functions
245 */
246 #define SPANTMP_PIXEL_FMT GL_BGRA
247 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
248
249 #define TAG(x) radeon##x##_ARGB8888
250 #define TAG2(x,y) radeon##x##_ARGB8888##y
251 #define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
252 #include "spantmp2.h"
253
254 /* ================================================================
255 * Depth buffer
256 */
257
258 /* The Radeon family has depth tiling on all the time, so we have to convert
259 * the x,y coordinates into the memory bus address (mba) in the same
260 * manner as the engine. In each case, the linear block address (ba)
261 * is calculated, and then wired with x and y to produce the final
262 * memory address.
263 * The chip will do address translation on its own if the surface registers
264 * are set up correctly. It is not quite enough to get it working with hyperz
265 * too...
266 */
267
268 /* 16-bit depth buffer functions
269 */
270 #define VALUE_TYPE GLushort
271
272 #define WRITE_DEPTH( _x, _y, d ) \
273 *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) = d
274
275 #define READ_DEPTH( d, _x, _y ) \
276 d = *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off)
277
278 #define TAG(x) radeon##x##_z16
279 #include "depthtmp.h"
280
281 /* 24 bit depth, 8 bit stencil depthbuffer functions
282 *
283 * Careful: It looks like the R300 uses ZZZS byte order while the R200
284 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
285 */
286 #define VALUE_TYPE GLuint
287
288 #ifdef COMPILE_R300
289 #define WRITE_DEPTH( _x, _y, d ) \
290 do { \
291 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
292 GLuint tmp = *_ptr; \
293 tmp &= 0x000000ff; \
294 tmp |= ((d << 8) & 0xffffff00); \
295 *_ptr = tmp; \
296 } while (0)
297 #else
298 #define WRITE_DEPTH( _x, _y, d ) \
299 do { \
300 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \_
301 GLuint tmp = *_ptr; \
302 tmp &= 0xff000000; \
303 tmp |= ((d) & 0x00ffffff); \
304 *_ptr = tmp; \
305 } while (0)
306 #endif
307
308 #ifdef COMPILE_R300
309 #define READ_DEPTH( d, _x, _y ) \
310 do { \
311 d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
312 }while(0)
313 #else
314 #define READ_DEPTH( d, _x, _y ) \
315 d = *(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off )) & 0x00ffffff;
316 #endif
317 /*
318 fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
319 d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff;
320 */
321 #define TAG(x) radeon##x##_z24_s8
322 #include "depthtmp.h"
323
324 /* ================================================================
325 * Stencil buffer
326 */
327
328 /* 24 bit depth, 8 bit stencil depthbuffer functions
329 */
330 #ifdef COMPILE_R300
331 #define WRITE_STENCIL( _x, _y, d ) \
332 do { \
333 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \
334 GLuint tmp = *_ptr; \
335 tmp &= 0xffffff00; \
336 tmp |= (d) & 0xff; \
337 *_ptr = tmp; \
338 } while (0)
339 #else
340 #define WRITE_STENCIL( _x, _y, d ) \
341 do { \
342 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \
343 GLuint tmp = *_ptr; \
344 tmp &= 0x00ffffff; \
345 tmp |= (((d) & 0xff) << 24); \
346 *_ptr = tmp; \
347 } while (0)
348 #endif
349
350 #ifdef COMPILE_R300
351 #define READ_STENCIL( d, _x, _y ) \
352 do { \
353 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
354 GLuint tmp = *_ptr; \
355 d = tmp & 0x000000ff; \
356 } while (0)
357 #else
358 #define READ_STENCIL( d, _x, _y ) \
359 do { \
360 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \
361 GLuint tmp = *_ptr; \
362 d = (tmp & 0xff000000) >> 24; \
363 } while (0)
364 #endif
365
366 #define TAG(x) radeon##x##_z24_s8
367 #include "stenciltmp.h"
368
369
370 static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
371 {
372 struct radeon_renderbuffer *rrb = (void*)rb;
373 int r;
374
375 if (rrb->bo) {
376 r = radeon_bo_map(rrb->bo, write);
377 if (r) {
378 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
379 __FUNCTION__, r);
380 }
381 }
382
383 radeonSetSpanFunctions(rrb);
384 }
385
386 static void unmap_buffer(struct gl_renderbuffer *rb)
387 {
388 struct radeon_renderbuffer *rrb = (void*)rb;
389
390 if (rrb->bo) {
391 radeon_bo_unmap(rrb->bo);
392 }
393 rb->GetRow = NULL;
394 rb->PutRow = NULL;
395 }
396
397 static void radeonSpanRenderStart(GLcontext * ctx)
398 {
399 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
400 int i;
401
402 radeon_firevertices(rmesa);
403
404 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
405 if (ctx->Texture.Unit[i]._ReallyEnabled)
406 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
407 }
408
409 /* color draw buffers */
410 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
411 map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
412 }
413
414 map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
415
416 if (ctx->DrawBuffer->_DepthBuffer) {
417 map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
418 }
419 if (ctx->DrawBuffer->_StencilBuffer)
420 map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
421
422 /* The locking and wait for idle should really only be needed in classic mode.
423 * In a future memory manager based implementation, this should become
424 * unnecessary due to the fact that mapping our buffers, textures, etc.
425 * should implicitly wait for any previous rendering commands that must
426 * be waited on. */
427 LOCK_HARDWARE(rmesa);
428 radeonWaitForIdleLocked(rmesa);
429 }
430
431 static void radeonSpanRenderFinish(GLcontext * ctx)
432 {
433 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
434 int i;
435 _swrast_flush(ctx);
436 UNLOCK_HARDWARE(rmesa);
437
438 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
439 if (ctx->Texture.Unit[i]._ReallyEnabled)
440 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
441 }
442
443 /* color draw buffers */
444 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
445 unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
446
447 unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
448
449 if (ctx->DrawBuffer->_DepthBuffer)
450 unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
451 if (ctx->DrawBuffer->_StencilBuffer)
452 unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
453 }
454
455 void radeonInitSpanFuncs(GLcontext * ctx)
456 {
457 struct swrast_device_driver *swdd =
458 _swrast_GetDeviceDriverReference(ctx);
459 swdd->SpanRenderStart = radeonSpanRenderStart;
460 swdd->SpanRenderFinish = radeonSpanRenderFinish;
461 }
462
463 /**
464 * Plug in the Get/Put routines for the given driRenderbuffer.
465 */
466 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
467 {
468 if (rrb->base._ActualFormat == GL_RGB5) {
469 radeonInitPointers_RGB565(&rrb->base);
470 } else if (rrb->base._ActualFormat == GL_RGB8) {
471 radeonInitPointers_ARGB8888(&rrb->base);
472 } else if (rrb->base._ActualFormat == GL_RGBA8) {
473 radeonInitPointers_ARGB8888(&rrb->base);
474 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
475 radeonInitDepthPointers_z16(&rrb->base);
476 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
477 radeonInitDepthPointers_z24_s8(&rrb->base);
478 } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
479 radeonInitStencilPointers_z24_s8(&rrb->base);
480 }
481 }