mesa: move gl_texture_image::Data, RowStride, ImageOffsets to swrast
[mesa.git] / src / mesa / drivers / dri / radeon / radeon_span.c
1 /**************************************************************************
2
3 Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved.
4 Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5 VA Linux Systems Inc., Fremont, California.
6
7 The Weather Channel (TM) funded Tungsten Graphics to develop the
8 initial release of the Radeon 8500 driver under the XFree86 license.
9 This notice must be preserved.
10
11 All Rights Reserved.
12
13 Permission is hereby granted, free of charge, to any person obtaining
14 a copy of this software and associated documentation files (the
15 "Software"), to deal in the Software without restriction, including
16 without limitation the rights to use, copy, modify, merge, publish,
17 distribute, sublicense, and/or sell copies of the Software, and to
18 permit persons to whom the Software is furnished to do so, subject to
19 the following conditions:
20
21 The above copyright notice and this permission notice (including the
22 next paragraph) shall be included in all copies or substantial
23 portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33 **************************************************************************/
34
35 /*
36 * Authors:
37 * Kevin E. Martin <martin@valinux.com>
38 * Gareth Hughes <gareth@valinux.com>
39 * Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43 #include "main/glheader.h"
44 #include "main/texformat.h"
45 #include "swrast/swrast.h"
46
47 #include "radeon_common.h"
48 #include "radeon_lock.h"
49 #include "radeon_span.h"
50
51 #define DBG 0
52
53 #if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && BYTE_ORDER == BIG_ENDIAN
54 #if defined(__linux__)
55 #include <byteswap.h>
56 #define CPU_TO_LE16( x ) bswap_16( x )
57 #define LE16_TO_CPU( x ) bswap_16( x )
58 #endif /* __linux__ */
59 #else
60 #define CPU_TO_LE16( x ) ( x )
61 #define LE16_TO_CPU( x ) ( x )
62 #endif
63
64 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
65
66
67 /* r200 depth buffer is always tiled - this is the formula
68 according to the docs unless I typo'ed in it
69 */
70 #if defined(RADEON_R200)
71 static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
72 GLint x, GLint y)
73 {
74 GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
75 GLint offset;
76 if (rrb->has_surface) {
77 offset = x * rrb->cpp + y * rrb->pitch;
78 } else {
79 GLuint b;
80 offset = 0;
81 b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6)));
82 offset += (b >> 1) << 12;
83 offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
84 offset += ((y >> 2) & 0x3) << 9;
85 offset += ((x >> 3) & 0x1) << 8;
86 offset += ((x >> 4) & 0x3) << 6;
87 offset += ((x >> 2) & 0x1) << 5;
88 offset += ((y >> 1) & 0x1) << 4;
89 offset += ((x >> 1) & 0x1) << 3;
90 offset += (y & 0x1) << 2;
91 offset += (x & 0x1) << 1;
92 }
93 return &ptr[offset];
94 }
95
96 static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
97 GLint x, GLint y)
98 {
99 GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
100 GLint offset;
101 if (rrb->has_surface) {
102 offset = x * rrb->cpp + y * rrb->pitch;
103 } else {
104 GLuint b;
105 offset = 0;
106 b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
107 offset += (b >> 1) << 12;
108 offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
109 offset += ((y >> 2) & 0x3) << 9;
110 offset += ((x >> 2) & 0x1) << 8;
111 offset += ((x >> 3) & 0x3) << 6;
112 offset += ((y >> 1) & 0x1) << 5;
113 offset += ((x >> 1) & 0x1) << 4;
114 offset += (y & 0x1) << 3;
115 offset += (x & 0x1) << 2;
116 }
117 return &ptr[offset];
118 }
119 #endif
120
121 /* r600 tiling
122 * two main types:
123 * - 1D (akin to macro-linear/micro-tiled on older asics)
124 * - 2D (akin to macro-tiled/micro-tiled on older asics)
125 */
126 #if defined(RADEON_R600)
127 static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
128 GLint x, GLint y, GLint is_depth, GLint is_stencil)
129 {
130 GLint element_bytes = rrb->cpp;
131 GLint num_samples = 1;
132 GLint tile_width = 8;
133 GLint tile_height = 8;
134 GLint tile_thickness = 1;
135 GLint pitch_elements = rrb->pitch / element_bytes;
136 GLint height = rrb->base.Height;
137 GLint z = 0;
138 GLint sample_number = 0;
139 /* */
140 GLint tile_bytes;
141 GLint tiles_per_row;
142 GLint tiles_per_slice;
143 GLint slice_offset;
144 GLint tile_row_index;
145 GLint tile_column_index;
146 GLint tile_offset;
147 GLint pixel_number = 0;
148 GLint element_offset;
149 GLint offset = 0;
150
151 tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
152 tiles_per_row = pitch_elements / tile_width;
153 tiles_per_slice = tiles_per_row * (height / tile_height);
154 slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
155 tile_row_index = y / tile_height;
156 tile_column_index = x / tile_width;
157 tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
158
159 if (is_depth) {
160 GLint pixel_offset = 0;
161
162 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
163 pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
164 pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
165 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
166 pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
167 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
168 switch (element_bytes) {
169 case 2:
170 pixel_offset = pixel_number * element_bytes * num_samples;
171 break;
172 case 4:
173 /* stencil and depth data are stored separately within a tile.
174 * stencil is stored in a contiguous tile before the depth tile.
175 * stencil element is 1 byte, depth element is 3 bytes.
176 * stencil tile is 64 bytes.
177 */
178 if (is_stencil)
179 pixel_offset = pixel_number * 1 * num_samples;
180 else
181 pixel_offset = (pixel_number * 3 * num_samples) + 64;
182 break;
183 }
184 element_offset = pixel_offset + (sample_number * element_bytes);
185 } else {
186 GLint sample_offset;
187
188 switch (element_bytes) {
189 case 1:
190 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
191 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
192 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
193 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
194 pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
195 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
196 break;
197 case 2:
198 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
199 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
200 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
201 pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
202 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
203 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
204 break;
205 case 4:
206 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
207 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
208 pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
209 pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
210 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
211 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
212 break;
213 }
214 sample_offset = sample_number * (tile_bytes / num_samples);
215 element_offset = sample_offset + (pixel_number * element_bytes);
216 }
217 offset = slice_offset + tile_offset + element_offset;
218 return offset;
219 }
220
221 static inline GLint r600_log2(GLint n)
222 {
223 GLint log2 = 0;
224
225 while (n >>= 1)
226 ++log2;
227 return log2;
228 }
229
230 static inline GLint r600_2d_tile_helper(const struct radeon_renderbuffer * rrb,
231 GLint x, GLint y, GLint is_depth, GLint is_stencil)
232 {
233 GLint group_bytes = rrb->group_bytes;
234 GLint num_channels = rrb->num_channels;
235 GLint num_banks = rrb->num_banks;
236 GLint r7xx_bank_op = rrb->r7xx_bank_op;
237 /* */
238 GLint group_bits = r600_log2(group_bytes);
239 GLint channel_bits = r600_log2(num_channels);
240 GLint bank_bits = r600_log2(num_banks);
241 GLint element_bytes = rrb->cpp;
242 GLint num_samples = 1;
243 GLint tile_width = 8;
244 GLint tile_height = 8;
245 GLint tile_thickness = 1;
246 GLint macro_tile_width = num_banks;
247 GLint macro_tile_height = num_channels;
248 GLint pitch_elements = (rrb->pitch / element_bytes) / tile_width;
249 GLint height = rrb->base.Height / tile_height;
250 GLint z = 0;
251 GLint sample_number = 0;
252 /* */
253 GLint tile_bytes;
254 GLint macro_tile_bytes;
255 GLint macro_tiles_per_row;
256 GLint macro_tiles_per_slice;
257 GLint slice_offset;
258 GLint macro_tile_row_index;
259 GLint macro_tile_column_index;
260 GLint macro_tile_offset;
261 GLint pixel_number = 0;
262 GLint element_offset;
263 GLint bank = 0;
264 GLint channel = 0;
265 GLint total_offset;
266 GLint group_mask = (1 << group_bits) - 1;
267 GLint offset_low;
268 GLint offset_high;
269 GLint offset = 0;
270
271 switch (num_channels) {
272 case 2:
273 default:
274 // channel[0] = x[3] ^ y[3]
275 channel |= (((x >> 3) ^ (y >> 3)) & 1) << 0;
276 break;
277 case 4:
278 // channel[0] = x[4] ^ y[3]
279 channel |= (((x >> 4) ^ (y >> 3)) & 1) << 0;
280 // channel[1] = x[3] ^ y[4]
281 channel |= (((x >> 3) ^ (y >> 4)) & 1) << 1;
282 break;
283 case 8:
284 // channel[0] = x[5] ^ y[3]
285 channel |= (((x >> 5) ^ (y >> 3)) & 1) << 0;
286 // channel[0] = x[4] ^ x[5] ^ y[4]
287 channel |= (((x >> 4) ^ (x >> 5) ^ (y >> 4)) & 1) << 1;
288 // channel[0] = x[3] ^ y[5]
289 channel |= (((x >> 3) ^ (y >> 5)) & 1) << 2;
290 break;
291 }
292
293 switch (num_banks) {
294 case 4:
295 // bank[0] = x[3] ^ y[4 + log2(num_channels)]
296 bank |= (((x >> 3) ^ (y >> (4 + channel_bits))) & 1) << 0;
297 if (r7xx_bank_op)
298 // bank[1] = x[3] ^ y[4 + log2(num_channels)] ^ x[5]
299 bank |= (((x >> 4) ^ (y >> (3 + channel_bits)) ^ (x >> 5)) & 1) << 1;
300 else
301 // bank[1] = x[4] ^ y[3 + log2(num_channels)]
302 bank |= (((x >> 4) ^ (y >> (3 + channel_bits))) & 1) << 1;
303 break;
304 case 8:
305 // bank[0] = x[3] ^ y[5 + log2(num_channels)]
306 bank |= (((x >> 3) ^ (y >> (5 + channel_bits))) & 1) << 0;
307 // bank[1] = x[4] ^ y[4 + log2(num_channels)] ^ y[5 + log2(num_channels)]
308 bank |= (((x >> 4) ^ (y >> (4 + channel_bits)) ^ (y >> (5 + channel_bits))) & 1) << 1;
309 if (r7xx_bank_op)
310 // bank[2] = x[5] ^ y[3 + log2(num_channels)] ^ x[6]
311 bank |= (((x >> 5) ^ (y >> (3 + channel_bits)) ^ (x >> 6)) & 1) << 2;
312 else
313 // bank[2] = x[5] ^ y[3 + log2(num_channels)]
314 bank |= (((x >> 5) ^ (y >> (3 + channel_bits))) & 1) << 2;
315 break;
316 }
317
318 tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
319 macro_tile_bytes = macro_tile_width * macro_tile_height * tile_bytes;
320 macro_tiles_per_row = pitch_elements / macro_tile_width;
321 macro_tiles_per_slice = macro_tiles_per_row * (height / macro_tile_height);
322 slice_offset = (z / tile_thickness) * macro_tiles_per_slice * macro_tile_bytes;
323 macro_tile_row_index = (y / tile_height) / macro_tile_height;
324 macro_tile_column_index = (x / tile_width) / macro_tile_width;
325 macro_tile_offset = ((macro_tile_row_index * macro_tiles_per_row) + macro_tile_column_index) * macro_tile_bytes;
326
327 if (is_depth) {
328 GLint pixel_offset = 0;
329
330 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
331 pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
332 pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
333 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
334 pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
335 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
336 switch (element_bytes) {
337 case 2:
338 pixel_offset = pixel_number * element_bytes * num_samples;
339 break;
340 case 4:
341 /* stencil and depth data are stored separately within a tile.
342 * stencil is stored in a contiguous tile before the depth tile.
343 * stencil element is 1 byte, depth element is 3 bytes.
344 * stencil tile is 64 bytes.
345 */
346 if (is_stencil)
347 pixel_offset = pixel_number * 1 * num_samples;
348 else
349 pixel_offset = (pixel_number * 3 * num_samples) + 64;
350 break;
351 }
352 element_offset = pixel_offset + (sample_number * element_bytes);
353 } else {
354 GLint sample_offset;
355
356 switch (element_bytes) {
357 case 1:
358 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
359 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
360 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
361 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
362 pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
363 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
364 break;
365 case 2:
366 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
367 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
368 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
369 pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
370 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
371 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
372 break;
373 case 4:
374 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
375 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
376 pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
377 pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
378 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
379 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
380 break;
381 }
382 sample_offset = sample_number * (tile_bytes / num_samples);
383 element_offset = sample_offset + (pixel_number * element_bytes);
384 }
385 total_offset = (slice_offset + macro_tile_offset) >> (channel_bits + bank_bits);
386 total_offset += element_offset;
387
388 offset_low = total_offset & group_mask;
389 offset_high = (total_offset & ~group_mask) << (channel_bits + bank_bits);
390 offset = (bank << (group_bits + channel_bits)) + (channel << group_bits) + offset_low + offset_high;
391
392 return offset;
393 }
394
395 /* depth buffers */
396 static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
397 GLint x, GLint y)
398 {
399 GLubyte *ptr = rrb->bo->ptr;
400 GLint offset;
401 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
402 offset = r600_2d_tile_helper(rrb, x, y, 1, 0);
403 else
404 offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
405 return &ptr[offset];
406 }
407
408 static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
409 GLint x, GLint y)
410 {
411 GLubyte *ptr = rrb->bo->ptr;
412 GLint offset;
413 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
414 offset = r600_2d_tile_helper(rrb, x, y, 1, 1);
415 else
416 offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
417 return &ptr[offset];
418 }
419
420 static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
421 GLint x, GLint y)
422 {
423 GLubyte *ptr = rrb->bo->ptr;
424 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
425 GLint offset;
426
427 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
428 offset = x * rrb->cpp + y * rrb->pitch;
429 } else {
430 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE)
431 offset = r600_2d_tile_helper(rrb, x, y, 0, 0);
432 else
433 offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
434 }
435 return &ptr[offset];
436 }
437
438 #else
439
440 /* radeon tiling on r300-r500 has 4 states,
441 macro-linear/micro-linear
442 macro-linear/micro-tiled
443 macro-tiled /micro-linear
444 macro-tiled /micro-tiled
445 1 byte surface
446 2 byte surface - two types - we only provide 8x2 microtiling
447 4 byte surface
448 8/16 byte (unused)
449 */
450 static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
451 GLint x, GLint y)
452 {
453 GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
454 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
455 GLint offset;
456
457 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
458 offset = x * rrb->cpp + y * rrb->pitch;
459 } else {
460 offset = 0;
461 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
462 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
463 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
464 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
465 offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
466 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
467 offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
468 offset += ((y >> 1) & 0x1) << 6;
469 offset += ((x >> 2) & 0x1) << 5;
470 offset += (y & 1) << 4;
471 offset += (x & 3) << 2;
472 } else {
473 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
474 offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
475 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
476 offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
477 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
478 offset += (y & 1) << 6;
479 offset += (x & 15) << 2;
480 }
481 } else {
482 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
483 offset += (y & 1) << 4;
484 offset += (x & 3) << 2;
485 }
486 }
487 return &ptr[offset];
488 }
489
490 static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
491 GLint x, GLint y)
492 {
493 GLubyte *ptr = rrb->bo->ptr + rrb->draw_offset;
494 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
495 GLint offset;
496
497 if (rrb->has_surface || !(rrb->bo->flags & mask)) {
498 offset = x * rrb->cpp + y * rrb->pitch;
499 } else {
500 offset = 0;
501 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
502 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
503 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
504 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
505 offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
506 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
507 offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
508 offset += ((y >> 1) & 0x1) << 6;
509 offset += ((x >> 3) & 0x1) << 5;
510 offset += (y & 1) << 4;
511 offset += (x & 3) << 2;
512 } else {
513 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
514 offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
515 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
516 offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
517 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
518 offset += (y & 1) << 6;
519 offset += ((x >> 4) & 0x1) << 5;
520 offset += (x & 15) << 2;
521 }
522 } else {
523 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
524 offset += (y & 0x1) << 4;
525 offset += (x & 0x7) << 1;
526 }
527 }
528 return &ptr[offset];
529 }
530
531 #endif
532
533 /*
534 * Note that all information needed to access pixels in a renderbuffer
535 * should be obtained through the gl_renderbuffer parameter, not per-context
536 * information.
537 */
538 #define LOCAL_VARS \
539 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
540 struct radeon_renderbuffer *rrb = (void *) rb; \
541 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
542 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
543 unsigned int num_cliprects; \
544 struct drm_clip_rect *cliprects; \
545 int x_off, y_off; \
546 GLuint p; \
547 (void)p; \
548 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
549
550 #define LOCAL_DEPTH_VARS \
551 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \
552 struct radeon_renderbuffer *rrb = (void *) rb; \
553 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
554 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
555 unsigned int num_cliprects; \
556 struct drm_clip_rect *cliprects; \
557 int x_off, y_off; \
558 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
559
560 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
561
562 #define Y_FLIP(_y) ((_y) * yScale + yBias)
563
564 #define HW_LOCK()
565
566 #define HW_UNLOCK()
567
568 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
569 * the cliprect info from the context, not the driDrawable.
570 * Move this into spantmp2.h someday.
571 */
572 #define HW_CLIPLOOP() \
573 do { \
574 int _nc = num_cliprects; \
575 while ( _nc-- ) { \
576 int minx = cliprects[_nc].x1 - x_off; \
577 int miny = cliprects[_nc].y1 - y_off; \
578 int maxx = cliprects[_nc].x2 - x_off; \
579 int maxy = cliprects[_nc].y2 - y_off;
580
581 /* ================================================================
582 * Color buffer
583 */
584
585 /* 16 bit, RGB565 color spanline and pixel functions
586 */
587 #define SPANTMP_PIXEL_FMT GL_RGB
588 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
589
590 #define TAG(x) radeon##x##_RGB565
591 #define TAG2(x,y) radeon##x##_RGB565##y
592 #if defined(RADEON_R600)
593 #define GET_VALUE(_x, _y) (LE16_TO_CPU(*(GLushort*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))))
594 #define PUT_VALUE(_x, _y, d) { \
595 GLushort *_ptr = (GLushort*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
596 *_ptr = CPU_TO_LE16(d); \
597 } while (0)
598 #else
599 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
600 #endif
601 #include "spantmp2.h"
602
603 #define SPANTMP_PIXEL_FMT GL_RGB
604 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5_REV
605
606 #define TAG(x) radeon##x##_RGB565_REV
607 #define TAG2(x,y) radeon##x##_RGB565_REV##y
608 #if defined(RADEON_R600)
609 #define GET_VALUE(_x, _y) (LE16_TO_CPU(*(GLushort*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))))
610 #define PUT_VALUE(_x, _y, d) { \
611 GLushort *_ptr = (GLushort*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
612 *_ptr = CPU_TO_LE16(d); \
613 } while (0)
614 #else
615 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
616 #endif
617 #include "spantmp2.h"
618
619 /* 16 bit, ARGB1555 color spanline and pixel functions
620 */
621 #define SPANTMP_PIXEL_FMT GL_BGRA
622 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
623
624 #define TAG(x) radeon##x##_ARGB1555
625 #define TAG2(x,y) radeon##x##_ARGB1555##y
626 #if defined(RADEON_R600)
627 #define GET_VALUE(_x, _y) (LE16_TO_CPU(*(GLushort*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))))
628 #define PUT_VALUE(_x, _y, d) { \
629 GLushort *_ptr = (GLushort*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
630 *_ptr = CPU_TO_LE16(d); \
631 } while (0)
632 #else
633 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
634 #endif
635 #include "spantmp2.h"
636
637 #define SPANTMP_PIXEL_FMT GL_BGRA
638 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5
639
640 #define TAG(x) radeon##x##_ARGB1555_REV
641 #define TAG2(x,y) radeon##x##_ARGB1555_REV##y
642 #if defined(RADEON_R600)
643 #define GET_VALUE(_x, _y) (LE16_TO_CPU(*(GLushort*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))))
644 #define PUT_VALUE(_x, _y, d) { \
645 GLushort *_ptr = (GLushort*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
646 *_ptr = CPU_TO_LE16(d); \
647 } while (0)
648 #else
649 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
650 #endif
651 #include "spantmp2.h"
652
653 /* 16 bit, RGBA4 color spanline and pixel functions
654 */
655 #define SPANTMP_PIXEL_FMT GL_BGRA
656 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
657
658 #define TAG(x) radeon##x##_ARGB4444
659 #define TAG2(x,y) radeon##x##_ARGB4444##y
660 #if defined(RADEON_R600)
661 #define GET_VALUE(_x, _y) (LE16_TO_CPU(*(GLushort*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))))
662 #define PUT_VALUE(_x, _y, d) { \
663 GLushort *_ptr = (GLushort*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
664 *_ptr = CPU_TO_LE16(d); \
665 } while (0)
666 #else
667 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
668 #endif
669 #include "spantmp2.h"
670
671 #define SPANTMP_PIXEL_FMT GL_BGRA
672 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4
673
674 #define TAG(x) radeon##x##_ARGB4444_REV
675 #define TAG2(x,y) radeon##x##_ARGB4444_REV##y
676 #if defined(RADEON_R600)
677 #define GET_VALUE(_x, _y) (LE16_TO_CPU(*(GLushort*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))))
678 #define PUT_VALUE(_x, _y, d) { \
679 GLushort *_ptr = (GLushort*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
680 *_ptr = CPU_TO_LE16(d); \
681 } while (0)
682 #else
683 #define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
684 #endif
685 #include "spantmp2.h"
686
687 /* 32 bit, xRGB8888 color spanline and pixel functions
688 */
689 #define SPANTMP_PIXEL_FMT GL_BGRA
690 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
691
692 #define TAG(x) radeon##x##_xRGB8888
693 #define TAG2(x,y) radeon##x##_xRGB8888##y
694 #if defined(RADEON_R600)
695 #define GET_VALUE(_x, _y) ((LE32_TO_CPU(*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))) | 0xff000000))
696 #define PUT_VALUE(_x, _y, d) { \
697 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
698 *_ptr = CPU_TO_LE32(d); \
699 } while (0)
700 #else
701 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
702 #define PUT_VALUE(_x, _y, d) { \
703 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
704 *_ptr = d; \
705 } while (0)
706 #endif
707 #include "spantmp2.h"
708
709 /* 32 bit, ARGB8888 color spanline and pixel functions
710 */
711 #define SPANTMP_PIXEL_FMT GL_BGRA
712 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
713
714 #define TAG(x) radeon##x##_ARGB8888
715 #define TAG2(x,y) radeon##x##_ARGB8888##y
716 #if defined(RADEON_R600)
717 #define GET_VALUE(_x, _y) (LE32_TO_CPU(*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))))
718 #define PUT_VALUE(_x, _y, d) { \
719 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
720 *_ptr = CPU_TO_LE32(d); \
721 } while (0)
722 #else
723 #define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
724 #define PUT_VALUE(_x, _y, d) { \
725 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
726 *_ptr = d; \
727 } while (0)
728 #endif
729 #include "spantmp2.h"
730
731 /* 32 bit, BGRx8888 color spanline and pixel functions
732 */
733 #define SPANTMP_PIXEL_FMT GL_BGRA
734 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
735
736 #define TAG(x) radeon##x##_BGRx8888
737 #define TAG2(x,y) radeon##x##_BGRx8888##y
738 #if defined(RADEON_R600)
739 #define GET_VALUE(_x, _y) ((LE32_TO_CPU(*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))) | 0x000000ff))
740 #define PUT_VALUE(_x, _y, d) { \
741 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
742 *_ptr = CPU_TO_LE32(d); \
743 } while (0)
744 #else
745 #define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
746 #define PUT_VALUE(_x, _y, d) { \
747 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
748 *_ptr = d; \
749 } while (0)
750 #endif
751 #include "spantmp2.h"
752
753 /* 32 bit, BGRA8888 color spanline and pixel functions
754 */
755 #define SPANTMP_PIXEL_FMT GL_BGRA
756 #define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
757
758 #define TAG(x) radeon##x##_BGRA8888
759 #define TAG2(x,y) radeon##x##_BGRA8888##y
760 #if defined(RADEON_R600)
761 #define GET_VALUE(_x, _y) (LE32_TO_CPU(*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))))
762 #define PUT_VALUE(_x, _y, d) { \
763 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \
764 *_ptr = CPU_TO_LE32(d); \
765 } while (0)
766 #else
767 #define GET_PTR(X,Y) radeon_ptr_4byte(rrb, (X) + x_off, (Y) + y_off)
768 #endif
769 #include "spantmp2.h"
770
771 /* ================================================================
772 * Depth buffer
773 */
774
775 /* The Radeon family has depth tiling on all the time, so we have to convert
776 * the x,y coordinates into the memory bus address (mba) in the same
777 * manner as the engine. In each case, the linear block address (ba)
778 * is calculated, and then wired with x and y to produce the final
779 * memory address.
780 * The chip will do address translation on its own if the surface registers
781 * are set up correctly. It is not quite enough to get it working with hyperz
782 * too...
783 */
784
785 /* 16-bit depth buffer functions
786 */
787 #define VALUE_TYPE GLushort
788
789 #if defined(RADEON_R200)
790 #define WRITE_DEPTH( _x, _y, d ) \
791 *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
792 #elif defined(RADEON_R600)
793 #define WRITE_DEPTH( _x, _y, d ) \
794 *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = CPU_TO_LE16(d)
795 #else
796 #define WRITE_DEPTH( _x, _y, d ) \
797 *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
798 #endif
799
800 #if defined(RADEON_R200)
801 #define READ_DEPTH( d, _x, _y ) \
802 d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
803 #elif defined(RADEON_R600)
804 #define READ_DEPTH( d, _x, _y ) \
805 d = LE16_TO_CPU(*(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off))
806 #else
807 #define READ_DEPTH( d, _x, _y ) \
808 d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
809 #endif
810
811 #define TAG(x) radeon##x##_z16
812 #include "depthtmp.h"
813
814 /* 24 bit depth
815 *
816 * Careful: It looks like the R300 uses ZZZS byte order while the R200
817 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
818 */
819 #define VALUE_TYPE GLuint
820
821 #if defined(RADEON_R300)
822 #define WRITE_DEPTH( _x, _y, d ) \
823 do { \
824 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
825 GLuint tmp = LE32_TO_CPU(*_ptr); \
826 tmp &= 0x000000ff; \
827 tmp |= ((d << 8) & 0xffffff00); \
828 *_ptr = CPU_TO_LE32(tmp); \
829 } while (0)
830 #elif defined(RADEON_R600)
831 #define WRITE_DEPTH( _x, _y, d ) \
832 do { \
833 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
834 GLuint tmp = LE32_TO_CPU(*_ptr); \
835 tmp &= 0xff000000; \
836 tmp |= ((d) & 0x00ffffff); \
837 *_ptr = CPU_TO_LE32(tmp); \
838 } while (0)
839 #elif defined(RADEON_R200)
840 #define WRITE_DEPTH( _x, _y, d ) \
841 do { \
842 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
843 GLuint tmp = LE32_TO_CPU(*_ptr); \
844 tmp &= 0xff000000; \
845 tmp |= ((d) & 0x00ffffff); \
846 *_ptr = CPU_TO_LE32(tmp); \
847 } while (0)
848 #else
849 #define WRITE_DEPTH( _x, _y, d ) \
850 do { \
851 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
852 GLuint tmp = LE32_TO_CPU(*_ptr); \
853 tmp &= 0xff000000; \
854 tmp |= ((d) & 0x00ffffff); \
855 *_ptr = CPU_TO_LE32(tmp); \
856 } while (0)
857 #endif
858
859 #if defined(RADEON_R300)
860 #define READ_DEPTH( d, _x, _y ) \
861 do { \
862 d = (LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0xffffff00) >> 8; \
863 }while(0)
864 #elif defined(RADEON_R600)
865 #define READ_DEPTH( d, _x, _y ) \
866 do { \
867 d = (LE32_TO_CPU(*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) & 0x00ffffff); \
868 }while(0)
869 #elif defined(RADEON_R200)
870 #define READ_DEPTH( d, _x, _y ) \
871 do { \
872 d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
873 }while(0)
874 #else
875 #define READ_DEPTH( d, _x, _y ) \
876 d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) & 0x00ffffff;
877 #endif
878
879 #define TAG(x) radeon##x##_z24
880 #include "depthtmp.h"
881
882 /* 24 bit depth, 8 bit stencil depthbuffer functions
883 * EXT_depth_stencil
884 *
885 * Careful: It looks like the R300 uses ZZZS byte order while the R200
886 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
887 */
888 #define VALUE_TYPE GLuint
889
890 #if defined(RADEON_R300)
891 #define WRITE_DEPTH( _x, _y, d ) \
892 do { \
893 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
894 *_ptr = CPU_TO_LE32((((d) & 0xff000000) >> 24) | (((d) & 0x00ffffff) << 8)); \
895 } while (0)
896 #elif defined(RADEON_R600)
897 #define WRITE_DEPTH( _x, _y, d ) \
898 do { \
899 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \
900 GLuint tmp = LE32_TO_CPU(*_ptr); \
901 tmp &= 0xff000000; \
902 tmp |= ((d) & 0x00ffffff); \
903 *_ptr = CPU_TO_LE32(tmp); \
904 _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
905 tmp = LE32_TO_CPU(*_ptr); \
906 tmp &= 0xffffff00; \
907 tmp |= ((d) >> 24) & 0xff; \
908 *_ptr = CPU_TO_LE32(tmp); \
909 } while (0)
910 #elif defined(RADEON_R200)
911 #define WRITE_DEPTH( _x, _y, d ) \
912 do { \
913 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
914 *_ptr = CPU_TO_LE32(d); \
915 } while (0)
916 #else
917 #define WRITE_DEPTH( _x, _y, d ) \
918 do { \
919 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
920 *_ptr = CPU_TO_LE32(d); \
921 } while (0)
922 #endif
923
924 #if defined(RADEON_R300)
925 #define READ_DEPTH( d, _x, _y ) \
926 do { \
927 GLuint tmp = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
928 d = LE32_TO_CPU(((tmp & 0x000000ff) << 24) | ((tmp & 0xffffff00) >> 8)); \
929 }while(0)
930 #elif defined(RADEON_R600)
931 #define READ_DEPTH( d, _x, _y ) \
932 do { \
933 d = (LE32_TO_CPU(*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) & 0x00ffffff); \
934 d |= ((LE32_TO_CPU(*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) << 24) & 0xff000000); \
935 }while(0)
936 #elif defined(RADEON_R200)
937 #define READ_DEPTH( d, _x, _y ) \
938 do { \
939 d = LE32_TO_CPU(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \
940 }while(0)
941 #else
942 #define READ_DEPTH( d, _x, _y ) do { \
943 d = LE32_TO_CPU(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \
944 } while (0)
945 #endif
946
947 #define TAG(x) radeon##x##_s8_z24
948 #include "depthtmp.h"
949
950 /* ================================================================
951 * Stencil buffer
952 */
953
954 /* 24 bit depth, 8 bit stencil depthbuffer functions
955 */
956 #ifdef RADEON_R300
957 #define WRITE_STENCIL( _x, _y, d ) \
958 do { \
959 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
960 GLuint tmp = LE32_TO_CPU(*_ptr); \
961 tmp &= 0xffffff00; \
962 tmp |= (d) & 0xff; \
963 *_ptr = CPU_TO_LE32(tmp); \
964 } while (0)
965 #elif defined(RADEON_R600)
966 #define WRITE_STENCIL( _x, _y, d ) \
967 do { \
968 GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \
969 GLuint tmp = LE32_TO_CPU(*_ptr); \
970 tmp &= 0xffffff00; \
971 tmp |= (d) & 0xff; \
972 *_ptr = CPU_TO_LE32(tmp); \
973 } while (0)
974 #elif defined(RADEON_R200)
975 #define WRITE_STENCIL( _x, _y, d ) \
976 do { \
977 GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \
978 GLuint tmp = LE32_TO_CPU(*_ptr); \
979 tmp &= 0x00ffffff; \
980 tmp |= (((d) & 0xff) << 24); \
981 *_ptr = CPU_TO_LE32(tmp); \
982 } while (0)
983 #else
984 #define WRITE_STENCIL( _x, _y, d ) \
985 do { \
986 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \
987 GLuint tmp = LE32_TO_CPU(*_ptr); \
988 tmp &= 0x00ffffff; \
989 tmp |= (((d) & 0xff) << 24); \
990 *_ptr = CPU_TO_LE32(tmp); \
991 } while (0)
992 #endif
993
994 #ifdef RADEON_R300
995 #define READ_STENCIL( d, _x, _y ) \
996 do { \
997 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
998 GLuint tmp = LE32_TO_CPU(*_ptr); \
999 d = tmp & 0x000000ff; \
1000 } while (0)
1001 #elif defined(RADEON_R600)
1002 #define READ_STENCIL( d, _x, _y ) \
1003 do { \
1004 GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \
1005 GLuint tmp = LE32_TO_CPU(*_ptr); \
1006 d = tmp & 0x000000ff; \
1007 } while (0)
1008 #elif defined(RADEON_R200)
1009 #define READ_STENCIL( d, _x, _y ) \
1010 do { \
1011 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \
1012 GLuint tmp = LE32_TO_CPU(*_ptr); \
1013 d = (tmp & 0xff000000) >> 24; \
1014 } while (0)
1015 #else
1016 #define READ_STENCIL( d, _x, _y ) \
1017 do { \
1018 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \
1019 GLuint tmp = LE32_TO_CPU(*_ptr); \
1020 d = (tmp & 0xff000000) >> 24; \
1021 } while (0)
1022 #endif
1023
1024 #define TAG(x) radeon##x##_s8_z24
1025 #include "stenciltmp.h"
1026
1027
1028 static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
1029 {
1030 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
1031 int r;
1032
1033 if (rrb == NULL || !rrb->bo)
1034 return;
1035
1036 radeon_print(RADEON_MEMORY, RADEON_TRACE,
1037 "%s( rb %p, flag %s )\n",
1038 __func__, rb, flag ? "true":"false");
1039
1040 if (flag) {
1041 radeon_bo_wait(rrb->bo);
1042 r = radeon_bo_map(rrb->bo, 1);
1043 if (r) {
1044 fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
1045 __FUNCTION__, r);
1046 }
1047
1048 radeonSetSpanFunctions(rrb);
1049 } else {
1050 radeon_bo_unmap(rrb->bo);
1051 rb->GetRow = NULL;
1052 rb->PutRow = NULL;
1053 }
1054 }
1055
1056 static void
1057 radeon_map_unmap_framebuffer(struct gl_context *ctx, struct gl_framebuffer *fb,
1058 GLboolean map)
1059 {
1060 GLuint i, j;
1061
1062 radeon_print(RADEON_MEMORY, RADEON_TRACE,
1063 "%s( %p , fb %p, map %s )\n",
1064 __func__, ctx, fb, map ? "true":"false");
1065
1066 /* color draw buffers */
1067 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
1068 map_unmap_rb(fb->_ColorDrawBuffers[j], map);
1069
1070 map_unmap_rb(fb->_ColorReadBuffer, map);
1071
1072 /* check for render to textures */
1073 for (i = 0; i < BUFFER_COUNT; i++) {
1074 struct gl_renderbuffer_attachment *att =
1075 fb->Attachment + i;
1076 struct gl_texture_object *tex = att->Texture;
1077 if (tex) {
1078 /* Render to texture. Note that a mipmapped texture need not
1079 * be complete for render to texture, so we must restrict to
1080 * mapping only the attached image.
1081 */
1082 radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
1083 ASSERT(att->Renderbuffer);
1084
1085 if (map)
1086 radeon_teximage_map(image, GL_TRUE);
1087 else
1088 radeon_teximage_unmap(image);
1089 }
1090 }
1091
1092 /* depth buffer (Note wrapper!) */
1093 if (fb->_DepthBuffer)
1094 map_unmap_rb(fb->_DepthBuffer->Wrapped, map);
1095
1096 if (fb->_StencilBuffer)
1097 map_unmap_rb(fb->_StencilBuffer->Wrapped, map);
1098
1099 radeon_check_front_buffer_rendering(ctx);
1100 }
1101
1102 static void radeonSpanRenderStart(struct gl_context * ctx)
1103 {
1104 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1105 int i;
1106
1107 radeon_firevertices(rmesa);
1108
1109 /* The locking and wait for idle should really only be needed in classic mode.
1110 * In a future memory manager based implementation, this should become
1111 * unnecessary due to the fact that mapping our buffers, textures, etc.
1112 * should implicitly wait for any previous rendering commands that must
1113 * be waited on. */
1114 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
1115 LOCK_HARDWARE(rmesa);
1116 radeonWaitForIdleLocked(rmesa);
1117 }
1118
1119 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1120 if (ctx->Texture.Unit[i]._ReallyEnabled)
1121 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
1122 }
1123
1124 radeon_map_unmap_framebuffer(ctx, ctx->DrawBuffer, GL_TRUE);
1125 if (ctx->ReadBuffer != ctx->DrawBuffer)
1126 radeon_map_unmap_framebuffer(ctx, ctx->ReadBuffer, GL_TRUE);
1127 }
1128
1129 static void radeonSpanRenderFinish(struct gl_context * ctx)
1130 {
1131 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
1132 int i;
1133
1134 _swrast_flush(ctx);
1135
1136 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
1137 if (ctx->Texture.Unit[i]._ReallyEnabled)
1138 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
1139 }
1140
1141 radeon_map_unmap_framebuffer(ctx, ctx->DrawBuffer, GL_FALSE);
1142 if (ctx->ReadBuffer != ctx->DrawBuffer)
1143 radeon_map_unmap_framebuffer(ctx, ctx->ReadBuffer, GL_FALSE);
1144
1145 if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
1146 UNLOCK_HARDWARE(rmesa);
1147 }
1148 }
1149
1150 void radeonInitSpanFuncs(struct gl_context * ctx)
1151 {
1152 struct swrast_device_driver *swdd =
1153 _swrast_GetDeviceDriverReference(ctx);
1154 swdd->SpanRenderStart = radeonSpanRenderStart;
1155 swdd->SpanRenderFinish = radeonSpanRenderFinish;
1156 }
1157
1158 /**
1159 * Plug in the Get/Put routines for the given driRenderbuffer.
1160 */
1161 static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
1162 {
1163 if (rrb->base.Format == MESA_FORMAT_RGB565) {
1164 radeonInitPointers_RGB565(&rrb->base);
1165 } else if (rrb->base.Format == MESA_FORMAT_RGB565_REV) {
1166 radeonInitPointers_RGB565_REV(&rrb->base);
1167 } else if (rrb->base.Format == MESA_FORMAT_XRGB8888) {
1168 radeonInitPointers_xRGB8888(&rrb->base);
1169 } else if (rrb->base.Format == MESA_FORMAT_XRGB8888_REV) {
1170 radeonInitPointers_BGRx8888(&rrb->base);
1171 } else if (rrb->base.Format == MESA_FORMAT_ARGB8888) {
1172 radeonInitPointers_ARGB8888(&rrb->base);
1173 } else if (rrb->base.Format == MESA_FORMAT_ARGB8888_REV) {
1174 radeonInitPointers_BGRA8888(&rrb->base);
1175 } else if (rrb->base.Format == MESA_FORMAT_ARGB4444) {
1176 radeonInitPointers_ARGB4444(&rrb->base);
1177 } else if (rrb->base.Format == MESA_FORMAT_ARGB4444_REV) {
1178 radeonInitPointers_ARGB4444_REV(&rrb->base);
1179 } else if (rrb->base.Format == MESA_FORMAT_ARGB1555) {
1180 radeonInitPointers_ARGB1555(&rrb->base);
1181 } else if (rrb->base.Format == MESA_FORMAT_ARGB1555_REV) {
1182 radeonInitPointers_ARGB1555_REV(&rrb->base);
1183 } else if (rrb->base.Format == MESA_FORMAT_Z16) {
1184 radeonInitDepthPointers_z16(&rrb->base);
1185 } else if (rrb->base.Format == MESA_FORMAT_X8_Z24) {
1186 radeonInitDepthPointers_z24(&rrb->base);
1187 } else if (rrb->base.Format == MESA_FORMAT_S8_Z24) {
1188 radeonInitDepthPointers_s8_z24(&rrb->base);
1189 } else if (rrb->base.Format == MESA_FORMAT_S8) {
1190 radeonInitStencilPointers_s8_z24(&rrb->base);
1191 } else {
1192 fprintf(stderr, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb->base.Format);
1193 }
1194 }