Merge branch 'mesa_7_7_branch'
[mesa.git] / src / mesa / drivers / dri / r128 / r128_span.c
1 /**************************************************************************
2
3 Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
4 Cedar Park, Texas.
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Gareth Hughes <gareth@valinux.com>
31 * Keith Whitwell <keith@tungstengraphics.com>
32 * Kevin E. Martin <martin@valinux.com>
33 *
34 */
35
36 #include "r128_context.h"
37 #include "r128_ioctl.h"
38 #include "r128_state.h"
39 #include "r128_span.h"
40 #include "r128_tex.h"
41
42 #include "swrast/swrast.h"
43
44 #define DBG 0
45
46 #define HAVE_HW_DEPTH_SPANS 1
47 #define HAVE_HW_DEPTH_PIXELS 1
48 #define HAVE_HW_STENCIL_SPANS 1
49 #define HAVE_HW_STENCIL_PIXELS 1
50
51 #define LOCAL_VARS \
52 r128ContextPtr rmesa = R128_CONTEXT(ctx); \
53 __DRIscreen *sPriv = rmesa->driScreen; \
54 __DRIdrawable *dPriv = rmesa->driDrawable; \
55 driRenderbuffer *drb = (driRenderbuffer *) rb; \
56 GLuint height = dPriv->h; \
57 GLuint p; \
58 (void) p;
59
60 #define LOCAL_DEPTH_VARS \
61 r128ContextPtr rmesa = R128_CONTEXT(ctx); \
62 r128ScreenPtr r128scrn = rmesa->r128Screen; \
63 __DRIscreen *sPriv = rmesa->driScreen; \
64 __DRIdrawable *dPriv = rmesa->driDrawable; \
65 GLuint height = dPriv->h; \
66 (void) r128scrn; (void) sPriv; (void) height
67
68 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
69
70 #define Y_FLIP( _y ) (height - _y - 1)
71
72 #define HW_LOCK()
73
74 #define HW_UNLOCK()
75
76
77
78 /* ================================================================
79 * Color buffer
80 */
81
82 /* 16 bit, RGB565 color spanline and pixel functions
83 */
84 #define SPANTMP_PIXEL_FMT GL_RGB
85 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
86
87 #define TAG(x) r128##x##_RGB565
88 #define TAG2(x,y) r128##x##_RGB565##y
89 #define GET_PTR(X,Y) (sPriv->pFB + drb->flippedOffset \
90 + ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
91 #include "spantmp2.h"
92
93
94 /* 32 bit, ARGB8888 color spanline and pixel functions
95 */
96 #define SPANTMP_PIXEL_FMT GL_BGRA
97 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
98
99 #define TAG(x) r128##x##_ARGB8888
100 #define TAG2(x,y) r128##x##_ARGB8888##y
101 #define GET_PTR(X,Y) (sPriv->pFB + drb->flippedOffset \
102 + ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
103 #include "spantmp2.h"
104
105 /* Idling in the depth/stencil span functions:
106 * For writes, the kernel reads from the given user-space buffer at dispatch
107 * time, and then writes to the depth buffer asynchronously.
108 * For reads, the kernel reads from the depth buffer and writes to the span
109 * temporary asynchronously.
110 * So, if we're going to read from the span temporary, we need to idle before
111 * doing so. But we don't need to idle after write, because the CPU won't
112 * be accessing the destination, only the accelerator (through 3d rendering or
113 * depth span reads)
114 * However, due to interactions from pixel cache between 2d (what we do with
115 * depth) and 3d (all other parts of the system), we idle at the begin and end
116 * of a set of span operations, which should cover the pix cache issue.
117 * Except, we still have major issues, as shown by no_rast=true glxgears, or
118 * stencilwrap.
119 */
120
121 /* ================================================================
122 * Depth buffer
123 */
124
125 /* These functions require locking */
126 #undef HW_LOCK
127 #undef HW_UNLOCK
128 #define HW_LOCK() LOCK_HARDWARE(R128_CONTEXT(ctx));
129 #define HW_UNLOCK() UNLOCK_HARDWARE(R128_CONTEXT(ctx));
130
131 /* 16-bit depth buffer functions
132 */
133 #define VALUE_TYPE GLushort
134
135 #define WRITE_DEPTH_SPAN() \
136 do { \
137 r128WriteDepthSpanLocked( rmesa, n, \
138 x + dPriv->x, \
139 y + dPriv->y, \
140 depth, mask ); \
141 } while (0)
142
143 #define WRITE_DEPTH_PIXELS() \
144 do { \
145 GLint ox[MAX_WIDTH]; \
146 GLint oy[MAX_WIDTH]; \
147 for ( i = 0 ; i < n ; i++ ) { \
148 ox[i] = x[i] + dPriv->x; \
149 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
150 } \
151 r128WriteDepthPixelsLocked( rmesa, n, ox, oy, depth, mask ); \
152 } while (0)
153
154 #define READ_DEPTH_SPAN() \
155 do { \
156 GLushort *buf = (GLushort *)((GLubyte *)sPriv->pFB + \
157 r128scrn->spanOffset); \
158 GLint i; \
159 \
160 r128ReadDepthSpanLocked( rmesa, n, \
161 x + dPriv->x, \
162 y + dPriv->y ); \
163 r128WaitForIdleLocked( rmesa ); \
164 \
165 for ( i = 0 ; i < n ; i++ ) { \
166 depth[i] = buf[i]; \
167 } \
168 } while (0)
169
170 #define READ_DEPTH_PIXELS() \
171 do { \
172 GLushort *buf = (GLushort *)((GLubyte *)sPriv->pFB + \
173 r128scrn->spanOffset); \
174 GLint i, remaining = n; \
175 \
176 while ( remaining > 0 ) { \
177 GLint ox[128]; \
178 GLint oy[128]; \
179 GLint count; \
180 \
181 if ( remaining <= 128 ) { \
182 count = remaining; \
183 } else { \
184 count = 128; \
185 } \
186 for ( i = 0 ; i < count ; i++ ) { \
187 ox[i] = x[i] + dPriv->x; \
188 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
189 } \
190 \
191 r128ReadDepthPixelsLocked( rmesa, count, ox, oy ); \
192 r128WaitForIdleLocked( rmesa ); \
193 \
194 for ( i = 0 ; i < count ; i++ ) { \
195 depth[i] = buf[i]; \
196 } \
197 depth += count; \
198 x += count; \
199 y += count; \
200 remaining -= count; \
201 } \
202 } while (0)
203
204 #define TAG(x) r128##x##_z16
205 #include "depthtmp.h"
206
207
208 /* 24-bit depth, 8-bit stencil buffer functions
209 */
210 #define VALUE_TYPE GLuint
211
212 #define WRITE_DEPTH_SPAN() \
213 do { \
214 GLuint buf[n]; \
215 GLint i; \
216 GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
217 r128scrn->spanOffset); \
218 r128ReadDepthSpanLocked( rmesa, n, \
219 x + dPriv->x, \
220 y + dPriv->y ); \
221 r128WaitForIdleLocked( rmesa ); \
222 for ( i = 0 ; i < n ; i++ ) { \
223 buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
224 } \
225 r128WriteDepthSpanLocked( rmesa, n, \
226 x + dPriv->x, \
227 y + dPriv->y, \
228 buf, mask ); \
229 } while (0)
230
231 #define WRITE_DEPTH_PIXELS() \
232 do { \
233 GLuint buf[n]; \
234 GLint ox[MAX_WIDTH]; \
235 GLint oy[MAX_WIDTH]; \
236 GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
237 r128scrn->spanOffset); \
238 for ( i = 0 ; i < n ; i++ ) { \
239 ox[i] = x[i] + dPriv->x; \
240 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
241 } \
242 r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
243 r128WaitForIdleLocked( rmesa ); \
244 for ( i = 0 ; i < n ; i++ ) { \
245 buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
246 } \
247 r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
248 } while (0)
249
250 #define READ_DEPTH_SPAN() \
251 do { \
252 GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
253 r128scrn->spanOffset); \
254 GLint i; \
255 \
256 /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
257 r128ReadDepthSpanLocked( rmesa, n, \
258 x + dPriv->x, \
259 y + dPriv->y ); \
260 r128WaitForIdleLocked( rmesa ); \
261 \
262 for ( i = 0 ; i < n ; i++ ) { \
263 depth[i] = buf[i] & 0x00ffffff; \
264 } \
265 } while (0)
266
267 #define READ_DEPTH_PIXELS() \
268 do { \
269 GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
270 r128scrn->spanOffset); \
271 GLint i, remaining = n; \
272 \
273 while ( remaining > 0 ) { \
274 GLint ox[128]; \
275 GLint oy[128]; \
276 GLint count; \
277 \
278 if ( remaining <= 128 ) { \
279 count = remaining; \
280 } else { \
281 count = 128; \
282 } \
283 for ( i = 0 ; i < count ; i++ ) { \
284 ox[i] = x[i] + dPriv->x; \
285 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
286 } \
287 \
288 r128ReadDepthPixelsLocked( rmesa, count, ox, oy ); \
289 r128WaitForIdleLocked( rmesa ); \
290 \
291 for ( i = 0 ; i < count ; i++ ) { \
292 depth[i] = buf[i] & 0x00ffffff; \
293 } \
294 depth += count; \
295 x += count; \
296 y += count; \
297 remaining -= count; \
298 } \
299 } while (0)
300
301 #define TAG(x) r128##x##_z24_s8
302 #include "depthtmp.h"
303
304
305
306 /* ================================================================
307 * Stencil buffer
308 */
309
310 /* 24 bit depth, 8 bit stencil depthbuffer functions
311 */
312 #define WRITE_STENCIL_SPAN() \
313 do { \
314 GLuint buf[n]; \
315 GLint i; \
316 GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
317 r128scrn->spanOffset); \
318 r128ReadDepthSpanLocked( rmesa, n, \
319 x + dPriv->x, \
320 y + dPriv->y ); \
321 r128WaitForIdleLocked( rmesa ); \
322 for ( i = 0 ; i < n ; i++ ) { \
323 buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
324 } \
325 r128WriteDepthSpanLocked( rmesa, n, \
326 x + dPriv->x, \
327 y + dPriv->y, \
328 buf, mask ); \
329 } while (0)
330
331 #define WRITE_STENCIL_PIXELS() \
332 do { \
333 GLuint buf[n]; \
334 GLint ox[MAX_WIDTH]; \
335 GLint oy[MAX_WIDTH]; \
336 GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
337 r128scrn->spanOffset); \
338 for ( i = 0 ; i < n ; i++ ) { \
339 ox[i] = x[i] + dPriv->x; \
340 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
341 } \
342 r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
343 r128WaitForIdleLocked( rmesa ); \
344 for ( i = 0 ; i < n ; i++ ) { \
345 buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
346 } \
347 r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
348 } while (0)
349
350 #define READ_STENCIL_SPAN() \
351 do { \
352 GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
353 r128scrn->spanOffset); \
354 GLint i; \
355 \
356 /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
357 r128ReadDepthSpanLocked( rmesa, n, \
358 x + dPriv->x, \
359 y + dPriv->y ); \
360 r128WaitForIdleLocked( rmesa ); \
361 \
362 for ( i = 0 ; i < n ; i++ ) { \
363 stencil[i] = (buf[i] & 0xff000000) >> 24; \
364 } \
365 } while (0)
366
367 #define READ_STENCIL_PIXELS() \
368 do { \
369 GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
370 r128scrn->spanOffset); \
371 GLint i, remaining = n; \
372 \
373 while ( remaining > 0 ) { \
374 GLint ox[128]; \
375 GLint oy[128]; \
376 GLint count; \
377 \
378 if ( remaining <= 128 ) { \
379 count = remaining; \
380 } else { \
381 count = 128; \
382 } \
383 for ( i = 0 ; i < count ; i++ ) { \
384 ox[i] = x[i] + dPriv->x; \
385 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
386 } \
387 \
388 r128ReadDepthPixelsLocked( rmesa, count, ox, oy ); \
389 r128WaitForIdleLocked( rmesa ); \
390 \
391 for ( i = 0 ; i < count ; i++ ) { \
392 stencil[i] = (buf[i] & 0xff000000) >> 24; \
393 } \
394 stencil += count; \
395 x += count; \
396 y += count; \
397 remaining -= count; \
398 } \
399 } while (0)
400
401 #define TAG(x) radeon##x##_z24_s8
402 #include "stenciltmp.h"
403
404 static void
405 r128SpanRenderStart( GLcontext *ctx )
406 {
407 r128ContextPtr rmesa = R128_CONTEXT(ctx);
408 FLUSH_BATCH(rmesa);
409 LOCK_HARDWARE(rmesa);
410 r128WaitForIdleLocked( rmesa );
411 }
412
413 static void
414 r128SpanRenderFinish( GLcontext *ctx )
415 {
416 r128ContextPtr rmesa = R128_CONTEXT(ctx);
417 _swrast_flush( ctx );
418 r128WaitForIdleLocked( rmesa );
419 UNLOCK_HARDWARE( rmesa );
420 }
421
422 void r128DDInitSpanFuncs( GLcontext *ctx )
423 {
424 struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
425 swdd->SpanRenderStart = r128SpanRenderStart;
426 swdd->SpanRenderFinish = r128SpanRenderFinish;
427 }
428
429
430 /**
431 * Plug in the Get/Put routines for the given driRenderbuffer.
432 */
433 void
434 r128SetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
435 {
436 if (drb->Base.Format == MESA_FORMAT_RGB565) {
437 r128InitPointers_RGB565(&drb->Base);
438 }
439 else if (drb->Base.Format == MESA_FORMAT_ARGB8888) {
440 r128InitPointers_ARGB8888(&drb->Base);
441 }
442 else if (drb->Base.Format == MESA_FORMAT_Z16) {
443 r128InitDepthPointers_z16(&drb->Base);
444 }
445 else if (drb->Base.Format == MESA_FORMAT_S8_Z24) {
446 r128InitDepthPointers_z24_s8(&drb->Base);
447 }
448 else if (drb->Base.Format == MESA_FORMAT_S8) {
449 radeonInitStencilPointers_z24_s8(&drb->Base);
450 }
451 }