Merge branch 'gallium-nopointsizeminmax'
[mesa.git] / src / mesa / drivers / dri / r128 / r128_span.c
1 /**************************************************************************
2
3 Copyright 1999, 2000 ATI Technologies Inc. and Precision Insight, Inc.,
4 Cedar Park, Texas.
5 All Rights Reserved.
6
7 Permission is hereby granted, free of charge, to any person obtaining a
8 copy of this software and associated documentation files (the "Software"),
9 to deal in the Software without restriction, including without limitation
10 on the rights to use, copy, modify, merge, publish, distribute, sub
11 license, and/or sell copies of the Software, and to permit persons to whom
12 the Software is furnished to do so, subject to the following conditions:
13
14 The above copyright notice and this permission notice (including the next
15 paragraph) shall be included in all copies or substantial portions of the
16 Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 ATI, PRECISION INSIGHT AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Gareth Hughes <gareth@valinux.com>
31 * Keith Whitwell <keith@tungstengraphics.com>
32 * Kevin E. Martin <martin@valinux.com>
33 *
34 */
35
36 #include "r128_context.h"
37 #include "r128_ioctl.h"
38 #include "r128_span.h"
39
40 #include "swrast/swrast.h"
41
42 #define DBG 0
43
44 #define HAVE_HW_DEPTH_SPANS 1
45 #define HAVE_HW_DEPTH_PIXELS 1
46 #define HAVE_HW_STENCIL_SPANS 1
47 #define HAVE_HW_STENCIL_PIXELS 1
48
49 #define LOCAL_VARS \
50 r128ContextPtr rmesa = R128_CONTEXT(ctx); \
51 __DRIscreen *sPriv = rmesa->driScreen; \
52 __DRIdrawable *dPriv = rmesa->driDrawable; \
53 driRenderbuffer *drb = (driRenderbuffer *) rb; \
54 GLuint height = dPriv->h; \
55 GLuint p; \
56 (void) p;
57
58 #define LOCAL_DEPTH_VARS \
59 r128ContextPtr rmesa = R128_CONTEXT(ctx); \
60 r128ScreenPtr r128scrn = rmesa->r128Screen; \
61 __DRIscreen *sPriv = rmesa->driScreen; \
62 __DRIdrawable *dPriv = rmesa->driDrawable; \
63 GLuint height = dPriv->h; \
64 (void) r128scrn; (void) sPriv; (void) height
65
66 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
67
68 #define Y_FLIP( _y ) (height - _y - 1)
69
70 #define HW_LOCK()
71
72 #define HW_UNLOCK()
73
74
75
76 /* ================================================================
77 * Color buffer
78 */
79
80 /* 16 bit, RGB565 color spanline and pixel functions
81 */
82 #define SPANTMP_PIXEL_FMT GL_RGB
83 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
84
85 #define TAG(x) r128##x##_RGB565
86 #define TAG2(x,y) r128##x##_RGB565##y
87 #define GET_PTR(X,Y) (sPriv->pFB + drb->flippedOffset \
88 + ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
89 #include "spantmp2.h"
90
91
92 /* 32 bit, ARGB8888 color spanline and pixel functions
93 */
94 #define SPANTMP_PIXEL_FMT GL_BGRA
95 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
96
97 #define TAG(x) r128##x##_ARGB8888
98 #define TAG2(x,y) r128##x##_ARGB8888##y
99 #define GET_PTR(X,Y) (sPriv->pFB + drb->flippedOffset \
100 + ((dPriv->y + (Y)) * drb->flippedPitch + (dPriv->x + (X))) * drb->cpp)
101 #include "spantmp2.h"
102
103 /* Idling in the depth/stencil span functions:
104 * For writes, the kernel reads from the given user-space buffer at dispatch
105 * time, and then writes to the depth buffer asynchronously.
106 * For reads, the kernel reads from the depth buffer and writes to the span
107 * temporary asynchronously.
108 * So, if we're going to read from the span temporary, we need to idle before
109 * doing so. But we don't need to idle after write, because the CPU won't
110 * be accessing the destination, only the accelerator (through 3d rendering or
111 * depth span reads)
112 * However, due to interactions from pixel cache between 2d (what we do with
113 * depth) and 3d (all other parts of the system), we idle at the begin and end
114 * of a set of span operations, which should cover the pix cache issue.
115 * Except, we still have major issues, as shown by no_rast=true glxgears, or
116 * stencilwrap.
117 */
118
119 /* ================================================================
120 * Depth buffer
121 */
122
123 /* These functions require locking */
124 #undef HW_LOCK
125 #undef HW_UNLOCK
126 #define HW_LOCK() LOCK_HARDWARE(R128_CONTEXT(ctx));
127 #define HW_UNLOCK() UNLOCK_HARDWARE(R128_CONTEXT(ctx));
128
129 /* 16-bit depth buffer functions
130 */
131 #define VALUE_TYPE GLushort
132
133 #define WRITE_DEPTH_SPAN() \
134 do { \
135 r128WriteDepthSpanLocked( rmesa, n, \
136 x + dPriv->x, \
137 y + dPriv->y, \
138 depth, mask ); \
139 } while (0)
140
141 #define WRITE_DEPTH_PIXELS() \
142 do { \
143 GLint ox[MAX_WIDTH]; \
144 GLint oy[MAX_WIDTH]; \
145 for ( i = 0 ; i < n ; i++ ) { \
146 ox[i] = x[i] + dPriv->x; \
147 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
148 } \
149 r128WriteDepthPixelsLocked( rmesa, n, ox, oy, depth, mask ); \
150 } while (0)
151
152 #define READ_DEPTH_SPAN() \
153 do { \
154 GLushort *buf = (GLushort *)((GLubyte *)sPriv->pFB + \
155 r128scrn->spanOffset); \
156 GLint i; \
157 \
158 r128ReadDepthSpanLocked( rmesa, n, \
159 x + dPriv->x, \
160 y + dPriv->y ); \
161 r128WaitForIdleLocked( rmesa ); \
162 \
163 for ( i = 0 ; i < n ; i++ ) { \
164 depth[i] = buf[i]; \
165 } \
166 } while (0)
167
168 #define READ_DEPTH_PIXELS() \
169 do { \
170 GLushort *buf = (GLushort *)((GLubyte *)sPriv->pFB + \
171 r128scrn->spanOffset); \
172 GLint i, remaining = n; \
173 \
174 while ( remaining > 0 ) { \
175 GLint ox[128]; \
176 GLint oy[128]; \
177 GLint count; \
178 \
179 if ( remaining <= 128 ) { \
180 count = remaining; \
181 } else { \
182 count = 128; \
183 } \
184 for ( i = 0 ; i < count ; i++ ) { \
185 ox[i] = x[i] + dPriv->x; \
186 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
187 } \
188 \
189 r128ReadDepthPixelsLocked( rmesa, count, ox, oy ); \
190 r128WaitForIdleLocked( rmesa ); \
191 \
192 for ( i = 0 ; i < count ; i++ ) { \
193 depth[i] = buf[i]; \
194 } \
195 depth += count; \
196 x += count; \
197 y += count; \
198 remaining -= count; \
199 } \
200 } while (0)
201
202 #define TAG(x) r128##x##_z16
203 #include "depthtmp.h"
204
205
206 /* 24-bit depth, 8-bit stencil buffer functions
207 */
208 #define VALUE_TYPE GLuint
209
210 #define WRITE_DEPTH_SPAN() \
211 do { \
212 GLuint buf[n]; \
213 GLint i; \
214 GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
215 r128scrn->spanOffset); \
216 r128ReadDepthSpanLocked( rmesa, n, \
217 x + dPriv->x, \
218 y + dPriv->y ); \
219 r128WaitForIdleLocked( rmesa ); \
220 for ( i = 0 ; i < n ; i++ ) { \
221 buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
222 } \
223 r128WriteDepthSpanLocked( rmesa, n, \
224 x + dPriv->x, \
225 y + dPriv->y, \
226 buf, mask ); \
227 } while (0)
228
229 #define WRITE_DEPTH_PIXELS() \
230 do { \
231 GLuint buf[n]; \
232 GLint ox[MAX_WIDTH]; \
233 GLint oy[MAX_WIDTH]; \
234 GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
235 r128scrn->spanOffset); \
236 for ( i = 0 ; i < n ; i++ ) { \
237 ox[i] = x[i] + dPriv->x; \
238 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
239 } \
240 r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
241 r128WaitForIdleLocked( rmesa ); \
242 for ( i = 0 ; i < n ; i++ ) { \
243 buf[i] = (readbuf[i] & 0xff000000) | (depth[i] & 0x00ffffff); \
244 } \
245 r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
246 } while (0)
247
248 #define READ_DEPTH_SPAN() \
249 do { \
250 GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
251 r128scrn->spanOffset); \
252 GLint i; \
253 \
254 /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
255 r128ReadDepthSpanLocked( rmesa, n, \
256 x + dPriv->x, \
257 y + dPriv->y ); \
258 r128WaitForIdleLocked( rmesa ); \
259 \
260 for ( i = 0 ; i < n ; i++ ) { \
261 depth[i] = buf[i] & 0x00ffffff; \
262 } \
263 } while (0)
264
265 #define READ_DEPTH_PIXELS() \
266 do { \
267 GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
268 r128scrn->spanOffset); \
269 GLint i, remaining = n; \
270 \
271 while ( remaining > 0 ) { \
272 GLint ox[128]; \
273 GLint oy[128]; \
274 GLint count; \
275 \
276 if ( remaining <= 128 ) { \
277 count = remaining; \
278 } else { \
279 count = 128; \
280 } \
281 for ( i = 0 ; i < count ; i++ ) { \
282 ox[i] = x[i] + dPriv->x; \
283 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
284 } \
285 \
286 r128ReadDepthPixelsLocked( rmesa, count, ox, oy ); \
287 r128WaitForIdleLocked( rmesa ); \
288 \
289 for ( i = 0 ; i < count ; i++ ) { \
290 depth[i] = buf[i] & 0x00ffffff; \
291 } \
292 depth += count; \
293 x += count; \
294 y += count; \
295 remaining -= count; \
296 } \
297 } while (0)
298
299 #define TAG(x) r128##x##_z24_s8
300 #include "depthtmp.h"
301
302
303
304 /* ================================================================
305 * Stencil buffer
306 */
307
308 /* 24 bit depth, 8 bit stencil depthbuffer functions
309 */
310 #define WRITE_STENCIL_SPAN() \
311 do { \
312 GLuint buf[n]; \
313 GLint i; \
314 GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
315 r128scrn->spanOffset); \
316 r128ReadDepthSpanLocked( rmesa, n, \
317 x + dPriv->x, \
318 y + dPriv->y ); \
319 r128WaitForIdleLocked( rmesa ); \
320 for ( i = 0 ; i < n ; i++ ) { \
321 buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
322 } \
323 r128WriteDepthSpanLocked( rmesa, n, \
324 x + dPriv->x, \
325 y + dPriv->y, \
326 buf, mask ); \
327 } while (0)
328
329 #define WRITE_STENCIL_PIXELS() \
330 do { \
331 GLuint buf[n]; \
332 GLint ox[MAX_WIDTH]; \
333 GLint oy[MAX_WIDTH]; \
334 GLuint *readbuf = (GLuint *)((GLubyte *)sPriv->pFB + \
335 r128scrn->spanOffset); \
336 for ( i = 0 ; i < n ; i++ ) { \
337 ox[i] = x[i] + dPriv->x; \
338 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
339 } \
340 r128ReadDepthPixelsLocked( rmesa, n, ox, oy ); \
341 r128WaitForIdleLocked( rmesa ); \
342 for ( i = 0 ; i < n ; i++ ) { \
343 buf[i] = (readbuf[i] & 0x00ffffff) | (stencil[i] << 24); \
344 } \
345 r128WriteDepthPixelsLocked( rmesa, n, ox, oy, buf, mask ); \
346 } while (0)
347
348 #define READ_STENCIL_SPAN() \
349 do { \
350 GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
351 r128scrn->spanOffset); \
352 GLint i; \
353 \
354 /*if (n >= 128) fprintf(stderr, "Large number of pixels: %d\n", n);*/ \
355 r128ReadDepthSpanLocked( rmesa, n, \
356 x + dPriv->x, \
357 y + dPriv->y ); \
358 r128WaitForIdleLocked( rmesa ); \
359 \
360 for ( i = 0 ; i < n ; i++ ) { \
361 stencil[i] = (buf[i] & 0xff000000) >> 24; \
362 } \
363 } while (0)
364
365 #define READ_STENCIL_PIXELS() \
366 do { \
367 GLuint *buf = (GLuint *)((GLubyte *)sPriv->pFB + \
368 r128scrn->spanOffset); \
369 GLint i, remaining = n; \
370 \
371 while ( remaining > 0 ) { \
372 GLint ox[128]; \
373 GLint oy[128]; \
374 GLint count; \
375 \
376 if ( remaining <= 128 ) { \
377 count = remaining; \
378 } else { \
379 count = 128; \
380 } \
381 for ( i = 0 ; i < count ; i++ ) { \
382 ox[i] = x[i] + dPriv->x; \
383 oy[i] = Y_FLIP( y[i] ) + dPriv->y; \
384 } \
385 \
386 r128ReadDepthPixelsLocked( rmesa, count, ox, oy ); \
387 r128WaitForIdleLocked( rmesa ); \
388 \
389 for ( i = 0 ; i < count ; i++ ) { \
390 stencil[i] = (buf[i] & 0xff000000) >> 24; \
391 } \
392 stencil += count; \
393 x += count; \
394 y += count; \
395 remaining -= count; \
396 } \
397 } while (0)
398
399 #define TAG(x) radeon##x##_z24_s8
400 #include "stenciltmp.h"
401
402 static void
403 r128SpanRenderStart( GLcontext *ctx )
404 {
405 r128ContextPtr rmesa = R128_CONTEXT(ctx);
406 FLUSH_BATCH(rmesa);
407 LOCK_HARDWARE(rmesa);
408 r128WaitForIdleLocked( rmesa );
409 }
410
411 static void
412 r128SpanRenderFinish( GLcontext *ctx )
413 {
414 r128ContextPtr rmesa = R128_CONTEXT(ctx);
415 _swrast_flush( ctx );
416 r128WaitForIdleLocked( rmesa );
417 UNLOCK_HARDWARE( rmesa );
418 }
419
420 void r128DDInitSpanFuncs( GLcontext *ctx )
421 {
422 struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
423 swdd->SpanRenderStart = r128SpanRenderStart;
424 swdd->SpanRenderFinish = r128SpanRenderFinish;
425 }
426
427
428 /**
429 * Plug in the Get/Put routines for the given driRenderbuffer.
430 */
431 void
432 r128SetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
433 {
434 if (drb->Base.Format == MESA_FORMAT_RGB565) {
435 r128InitPointers_RGB565(&drb->Base);
436 }
437 else if (drb->Base.Format == MESA_FORMAT_ARGB8888) {
438 r128InitPointers_ARGB8888(&drb->Base);
439 }
440 else if (drb->Base.Format == MESA_FORMAT_Z16) {
441 r128InitDepthPointers_z16(&drb->Base);
442 }
443 else if (drb->Base.Format == MESA_FORMAT_S8_Z24) {
444 r128InitDepthPointers_z24_s8(&drb->Base);
445 }
446 else if (drb->Base.Format == MESA_FORMAT_S8) {
447 radeonInitStencilPointers_z24_s8(&drb->Base);
448 }
449 }