Merge remote branch 'main/master' into radeon-rewrite
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "swrast/swrast.h"
45
46 #include "radeon_common.h"
47 #include "radeon_lock.h"
48 #include "radeon_span.h"
49
50 #define DBG 0
51
52 static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
53 GLint x, GLint y)
54 {
55 GLubyte *ptr = rrb->bo->ptr;
56 const __DRIdrawablePrivate *dPriv = rrb->dPriv;
57 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
58 GLint offset;
59 GLint nmacroblkpl;
60 GLint nmicroblkpl;
61
62 x += dPriv->x;
63 y += dPriv->y;
64
65 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
66 offset = x * rrb->cpp + y * rrb->pitch;
67 } else {
68 offset = 0;
69 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
70 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
71 nmacroblkpl = rrb->pitch >> 5;
72 offset += ((y >> 4) * nmacroblkpl) << 11;
73 offset += ((y & 15) >> 1) << 8;
74 offset += (y & 1) << 4;
75 offset += (x >> 5) << 11;
76 offset += ((x & 31) >> 2) << 5;
77 offset += (x & 3) << 2;
78 } else {
79 nmacroblkpl = rrb->pitch >> 6;
80 offset += ((y >> 3) * nmacroblkpl) << 11;
81 offset += (y & 7) << 8;
82 offset += (x >> 6) << 11;
83 offset += ((x & 63) >> 3) << 5;
84 offset += (x & 7) << 2;
85 }
86 } else {
87 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
88 offset += (y * nmicroblkpl) << 5;
89 offset += (x >> 3) << 5;
90 offset += (x & 7) << 2;
91 }
92 }
93 return &ptr[offset];
94 }
95
96 static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
97 GLint x, GLint y)
98 {
99 GLubyte *ptr = rrb->bo->ptr;
100 const __DRIdrawablePrivate *dPriv = rrb->dPriv;
101 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
102 GLint offset;
103 GLint nmacroblkpl;
104 GLint nmicroblkpl;
105
106 x += dPriv->x;
107 y += dPriv->y;
108
109 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
110 offset = x * rrb->cpp + y * rrb->pitch;
111 } else {
112 offset = 0;
113 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
114 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
115 nmacroblkpl = rrb->pitch >> 6;
116 offset += ((y >> 4) * nmacroblkpl) << 11;
117 offset += ((y & 15) >> 1) << 8;
118 offset += (y & 1) << 4;
119 offset += (x >> 6) << 11;
120 offset += ((x & 63) >> 3) << 5;
121 offset += (x & 7) << 1;
122 } else {
123 nmacroblkpl = rrb->pitch >> 7;
124 offset += ((y >> 3) * nmacroblkpl) << 11;
125 offset += (y & 7) << 8;
126 offset += (x >> 7) << 11;
127 offset += ((x & 127) >> 4) << 5;
128 offset += (x & 15) << 2;
129 }
130 } else {
131 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
132 offset += (y * nmicroblkpl) << 5;
133 offset += (x >> 4) << 5;
134 offset += (x & 15) << 2;
135 }
136 }
137 return &ptr[offset];
138 }
139
140 static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
141 GLint x, GLint y)
142 {
143 GLubyte *ptr = rrb->bo->ptr;
144 const __DRIdrawablePrivate *dPriv = rrb->dPriv;
145 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
146 GLint offset;
147 GLint microblkxs;
148 GLint macroblkxs;
149 GLint nmacroblkpl;
150 GLint nmicroblkpl;
151
152 x += dPriv->x;
153 y += dPriv->y;
154
155 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
156 offset = x * rrb->cpp + y * rrb->pitch;
157 } else {
158 offset = 0;
159 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
160 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
161 microblkxs = 16 / rrb->cpp;
162 macroblkxs = 128 / rrb->cpp;
163 nmacroblkpl = rrb->pitch / macroblkxs;
164 offset += ((y >> 4) * nmacroblkpl) << 11;
165 offset += ((y & 15) >> 1) << 8;
166 offset += (y & 1) << 4;
167 offset += (x / macroblkxs) << 11;
168 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
169 offset += (x & (microblkxs - 1)) * rrb->cpp;
170 } else {
171 microblkxs = 32 / rrb->cpp;
172 macroblkxs = 256 / rrb->cpp;
173 nmacroblkpl = rrb->pitch / macroblkxs;
174 offset += ((y >> 3) * nmacroblkpl) << 11;
175 offset += (y & 7) << 8;
176 offset += (x / macroblkxs) << 11;
177 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
178 offset += (x & (microblkxs - 1)) * rrb->cpp;
179 }
180 } else {
181 microblkxs = 32 / rrb->cpp;
182 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
183 offset += (y * nmicroblkpl) << 5;
184 offset += (x / microblkxs) << 5;
185 offset += (x & (microblkxs - 1)) * rrb->cpp;
186 }
187 }
188 return &ptr[offset];
189 }
190
191
192 /*
193 * Note that all information needed to access pixels in a renderbuffer
194 * should be obtained through the gl_renderbuffer parameter, not per-context
195 * information.
196 */
197 #define LOCAL_VARS \
198 struct radeon_renderbuffer *rrb = (void *) rb; \
199 const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
200 const GLuint bottom = dPriv->h - 1; \
201 GLuint p; \
202 (void)p;
203
204 #define LOCAL_DEPTH_VARS \
205 struct radeon_renderbuffer *rrb = (void *) rb; \
206 const __DRIdrawablePrivate *dPriv = rrb->dPriv; \
207 const GLuint bottom = dPriv->h - 1;
208
209 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
210
211 #define Y_FLIP(Y) (bottom - (Y))
212
213 #define HW_LOCK()
214
215 #define HW_UNLOCK()
216
217 /* ================================================================
218 * Color buffer
219 */
220
221 /* 16 bit, RGB565 color spanline and pixel functions
222 */
223 #define SPANTMP_PIXEL_FMT GL_RGB
224 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
225
226 #define TAG(x) radeon##x##_RGB565
227 #define TAG2(x,y) radeon##x##_RGB565##y
228 #define GET_PTR(X,Y) radeon_ptr16(rrb, (X), (Y))
229 #include "spantmp2.h"
230
231 /* 32 bit, ARGB8888 color spanline and pixel functions
232 */
233 #define SPANTMP_PIXEL_FMT GL_BGRA
234 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
235
236 #define TAG(x) radeon##x##_ARGB8888
237 #define TAG2(x,y) radeon##x##_ARGB8888##y
238 #define GET_PTR(X,Y) radeon_ptr32(rrb, (X), (Y))
239 #include "spantmp2.h"
240
241 /* ================================================================
242 * Depth buffer
243 */
244
245 /* The Radeon family has depth tiling on all the time, so we have to convert
246 * the x,y coordinates into the memory bus address (mba) in the same
247 * manner as the engine. In each case, the linear block address (ba)
248 * is calculated, and then wired with x and y to produce the final
249 * memory address.
250 * The chip will do address translation on its own if the surface registers
251 * are set up correctly. It is not quite enough to get it working with hyperz
252 * too...
253 */
254
255 /* 16-bit depth buffer functions
256 */
257 #define VALUE_TYPE GLushort
258
259 #define WRITE_DEPTH( _x, _y, d ) \
260 *(GLushort *)radeon_ptr(rrb, _x, _y) = d
261
262 #define READ_DEPTH( d, _x, _y ) \
263 d = *(GLushort *)radeon_ptr(rrb, _x, _y)
264
265 #define TAG(x) radeon##x##_z16
266 #include "depthtmp.h"
267
268 /* 24 bit depth, 8 bit stencil depthbuffer functions
269 *
270 * Careful: It looks like the R300 uses ZZZS byte order while the R200
271 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
272 */
273 #define VALUE_TYPE GLuint
274
275 #ifdef COMPILE_R300
276 #define WRITE_DEPTH( _x, _y, d ) \
277 do { \
278 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \
279 GLuint tmp = *_ptr; \
280 tmp &= 0x000000ff; \
281 tmp |= ((d << 8) & 0xffffff00); \
282 *_ptr = tmp; \
283 } while (0)
284 #else
285 #define WRITE_DEPTH( _x, _y, d ) \
286 do { \
287 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \
288 GLuint tmp = *_ptr; \
289 tmp &= 0xff000000; \
290 tmp |= ((d) & 0x00ffffff); \
291 *_ptr = tmp; \
292 } while (0)
293 #endif
294
295 #ifdef COMPILE_R300
296 #define READ_DEPTH( d, _x, _y ) \
297 do { \
298 d = (*(GLuint*)(radeon_ptr32(rrb, _x, _y)) & 0xffffff00) >> 8; \
299 }while(0)
300 #else
301 #define READ_DEPTH( d, _x, _y ) \
302 d = *(GLuint*)(radeon_ptr32(rrb, _x, _y )) & 0x00ffffff;
303 #endif
304 /*
305 fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
306 d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff;
307 */
308 #define TAG(x) radeon##x##_z24_s8
309 #include "depthtmp.h"
310
311 /* ================================================================
312 * Stencil buffer
313 */
314
315 /* 24 bit depth, 8 bit stencil depthbuffer functions
316 */
317 #ifdef COMPILE_R300
318 #define WRITE_STENCIL( _x, _y, d ) \
319 do { \
320 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y); \
321 GLuint tmp = *_ptr; \
322 tmp &= 0xffffff00; \
323 tmp |= (d) & 0xff; \
324 *_ptr = tmp; \
325 } while (0)
326 #else
327 #define WRITE_STENCIL( _x, _y, d ) \
328 do { \
329 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x, _y); \
330 GLuint tmp = *_ptr; \
331 tmp &= 0x00ffffff; \
332 tmp |= (((d) & 0xff) << 24); \
333 *_ptr = tmp; \
334 } while (0)
335 #endif
336
337 #ifdef COMPILE_R300
338 #define READ_STENCIL( d, _x, _y ) \
339 do { \
340 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \
341 GLuint tmp = *_ptr; \
342 d = tmp & 0x000000ff; \
343 } while (0)
344 #else
345 #define READ_STENCIL( d, _x, _y ) \
346 do { \
347 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x, _y ); \
348 GLuint tmp = *_ptr; \
349 d = (tmp & 0xff000000) >> 24; \
350 } while (0)
351 #endif
352
353 #define TAG(x) radeon##x##_z24_s8
354 #include "stenciltmp.h"
355
356
357 static void map_buffer(struct gl_renderbuffer *rb, GLboolean write)
358 {
359 struct radeon_renderbuffer *rrb = (void*)rb;
360 int r;
361
362 if (rrb->bo) {
363 r = radeon_bo_map(rrb->bo, write);
364 if (r) {
365 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
366 __FUNCTION__, r);
367 }
368 }
369 }
370
371 static void unmap_buffer(struct gl_renderbuffer *rb)
372 {
373 struct radeon_renderbuffer *rrb = (void*)rb;
374
375 if (rrb->bo) {
376 radeon_bo_unmap(rrb->bo);
377 }
378 }
379
380 static void radeonSpanRenderStart(GLcontext * ctx)
381 {
382 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
383 int i;
384
385 radeon_firevertices(rmesa);
386
387 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
388 if (ctx->Texture.Unit[i]._ReallyEnabled)
389 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
390 }
391
392 /* color draw buffers */
393 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
394 map_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i], GL_TRUE);
395 }
396
397 map_buffer(ctx->ReadBuffer->_ColorReadBuffer, GL_FALSE);
398
399 if (ctx->DrawBuffer->_DepthBuffer) {
400 map_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped, GL_TRUE);
401 }
402 if (ctx->DrawBuffer->_StencilBuffer)
403 map_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped, GL_TRUE);
404
405 /* The locking and wait for idle should really only be needed in classic mode.
406 * In a future memory manager based implementation, this should become
407 * unnecessary due to the fact that mapping our buffers, textures, etc.
408 * should implicitly wait for any previous rendering commands that must
409 * be waited on. */
410 LOCK_HARDWARE(rmesa);
411 radeonWaitForIdleLocked(rmesa);
412 }
413
414 static void radeonSpanRenderFinish(GLcontext * ctx)
415 {
416 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
417 int i;
418 _swrast_flush(ctx);
419 UNLOCK_HARDWARE(rmesa);
420
421 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
422 if (ctx->Texture.Unit[i]._ReallyEnabled)
423 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
424 }
425
426 /* color draw buffers */
427 for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++)
428 unmap_buffer(ctx->DrawBuffer->_ColorDrawBuffers[i]);
429
430 unmap_buffer(ctx->ReadBuffer->_ColorReadBuffer);
431
432 if (ctx->DrawBuffer->_DepthBuffer)
433 unmap_buffer(ctx->DrawBuffer->_DepthBuffer->Wrapped);
434 if (ctx->DrawBuffer->_StencilBuffer)
435 unmap_buffer(ctx->DrawBuffer->_StencilBuffer->Wrapped);
436 }
437
438 void radeonInitSpanFuncs(GLcontext * ctx)
439 {
440 struct swrast_device_driver *swdd =
441 _swrast_GetDeviceDriverReference(ctx);
442 swdd->SpanRenderStart = radeonSpanRenderStart;
443 swdd->SpanRenderFinish = radeonSpanRenderFinish;
444 }
445
446 /**
447 * Plug in the Get/Put routines for the given driRenderbuffer.
448 */
449 void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
450 {
451 if (rrb->base.InternalFormat == GL_RGB5) {
452 radeonInitPointers_RGB565(&rrb->base);
453 } else if (rrb->base.InternalFormat == GL_RGBA8) {
454 radeonInitPointers_ARGB8888(&rrb->base);
455 } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
456 radeonInitDepthPointers_z16(&rrb->base);
457 } else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
458 radeonInitDepthPointers_z24_s8(&rrb->base);
459 } else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
460 radeonInitStencilPointers_z24_s8(&rrb->base);
461 }
462 }